David Blume commited on 2018-01-20 20:31:35
Showing 1 changed files, with 110 additions and 272 deletions.
| ... | ... |
@@ -1,19 +1,18 @@ |
| 1 | 1 |
#!/usr/bin/python |
| 2 |
-# chmod 755 me, and make sure I have UNIX style newlines. |
|
| 3 | 2 |
# |
| 4 | 3 |
# techcrunch.py |
| 5 | 4 |
# |
| 6 |
-# http://feeds.feedburner.com/TechCrunch |
|
| 7 |
-# feed = feedparser.parse( 'http://feeds.feedburner.com/TechCrunch' ) |
|
| 8 |
-# feed.entries[14]['feedburner_origlink'], feed.entries[14]['slash_comments'] |
|
| 5 |
+# For reference: See the SVN history of this file to see how I implemented |
|
| 6 |
+# 1. retweet counts |
|
| 7 |
+# 2. slash comment counts |
|
| 8 |
+# 3. disqus comment counts, and the cookie for them |
|
| 9 |
+# http://websvn.dlma.com/filedetails.php?repname=private&path=%2Fwww%2Ftechcrunch.dlma.com%2Ftrunk%2Ftechcrunch.py |
|
| 9 | 10 |
# |
| 10 | 11 |
# TODO: |
| 11 | 12 |
# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>' |
| 12 | 13 |
# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/" |
| 13 | 14 |
# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
| 14 | 15 |
# |
| 15 |
-# This file was coverted from tabs to spaces with the vim command %retab |
|
| 16 |
-# |
|
| 17 | 16 |
# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
| 18 | 17 |
# |
| 19 | 18 |
|
| ... | ... |
@@ -34,10 +33,8 @@ import httplib |
| 34 | 33 |
import shutil |
| 35 | 34 |
import glob |
| 36 | 35 |
import smtplib |
| 37 |
-import bisect |
|
| 38 | 36 |
import analysis |
| 39 | 37 |
import json |
| 40 |
-import cookielib |
|
| 41 | 38 |
import xml |
| 42 | 39 |
import texttime |
| 43 | 40 |
import operator |
| ... | ... |
@@ -49,6 +46,9 @@ any_entry_added = False |
| 49 | 46 |
tags_to_post = set(['apple', 'google']) |
| 50 | 47 |
authors_to_post = ['michael arrington',] |
| 51 | 48 |
|
| 49 |
+rhs_metric = 'fb_shares' |
|
| 50 |
+rhs_metric_times = 'comment_times' |
|
| 51 |
+ |
|
| 52 | 52 |
localdir = '' |
| 53 | 53 |
|
| 54 | 54 |
html_head = """ |
| ... | ... |
@@ -127,48 +127,39 @@ def index_id( a_list, op, elem ): |
| 127 | 127 |
return -1 |
| 128 | 128 |
|
| 129 | 129 |
|
| 130 |
-def index_id_simple( a_list, elem ): |
|
| 131 |
- index = 0 |
|
| 132 |
- for item in a_list: |
|
| 133 |
- if item == elem: |
|
| 134 |
- return index |
|
| 135 |
- index += 1 |
|
| 136 |
- return -1 |
|
| 137 |
- |
|
| 138 |
- |
|
| 139 |
-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
|
| 140 |
- retweet_values, threshold_value, is_odd_row, tag_hit ): |
|
| 141 |
-# comment_times, comment_values = zip( *comments ) |
|
| 142 |
-# retweet_times, retweet_values = zip( *retweets ) |
|
| 130 |
+def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
|
| 131 |
+ rhs_values, threshold_value, is_odd_row, tag_hit): |
|
| 132 |
+# lhs_times, lhs_values = zip(*comments) |
|
| 133 |
+# rhs_times, rhs_values = zip(*rhs) |
|
| 143 | 134 |
|
| 144 | 135 |
# TODO handle failure cases, -1 |
| 145 | 136 |
|
| 146 |
- if not len( comment_times ): |
|
| 147 |
- comment_times = [ time_posted, ] |
|
| 148 |
- if not len( comment_values ): |
|
| 149 |
- comment_values = [ 0, ] |
|
| 150 |
- if not len( retweet_times ): |
|
| 151 |
- retweet_times = [ time_posted, ] |
|
| 152 |
- if not len( retweet_values ): |
|
| 153 |
- retweet_values = [ 0, ] |
|
| 154 |
- |
|
| 155 |
-# comment_times = [ (i - time_posted + 900) / 1800 for i in comment_times ] |
|
| 156 |
-# retweet_times = [ (i - time_posted + 900) / 1800 for i in retweet_times ] |
|
| 157 |
- comment_times = [ (i - time_posted) / 1800 for i in comment_times ] |
|
| 158 |
- retweet_times = [ (i - time_posted) / 1800 for i in retweet_times ] |
|
| 159 |
- |
|
| 160 |
- min_comment_time = min( comment_times ) |
|
| 161 |
- max_comment_time = max( comment_times ) |
|
| 162 |
- min_comment_value = min( comment_values ) |
|
| 163 |
- max_comment_value = max( comment_values ) |
|
| 164 |
- min_retweet_time = min( retweet_times ) |
|
| 165 |
- max_retweet_time = max( retweet_times ) |
|
| 166 |
- min_retweet_value = min( retweet_values ) |
|
| 167 |
- max_retweet_value = max( retweet_values ) |
|
| 137 |
+ if not len(lhs_times): |
|
| 138 |
+ lhs_times = [time_posted,] |
|
| 139 |
+ if not len(lhs_values): |
|
| 140 |
+ lhs_values = [0,] |
|
| 141 |
+ if not len(rhs_times): |
|
| 142 |
+ rhs_times = [time_posted,] |
|
| 143 |
+ if not len(rhs_values): |
|
| 144 |
+ rhs_values = [0,] |
|
| 145 |
+ |
|
| 146 |
+# lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times] |
|
| 147 |
+# rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times] |
|
| 148 |
+ lhs_times = [(i - time_posted) / 1800 for i in lhs_times] |
|
| 149 |
+ rhs_times = [(i - time_posted) / 1800 for i in rhs_times] |
|
| 150 |
+ |
|
| 151 |
+ min_comment_time = min(lhs_times) |
|
| 152 |
+ max_comment_time = max(lhs_times) |
|
| 153 |
+ min_comment_value = min(lhs_values) |
|
| 154 |
+ max_comment_value = max(lhs_values) |
|
| 155 |
+ min_rhs_time = min(rhs_times) |
|
| 156 |
+ max_rhs_time = max(rhs_times) |
|
| 157 |
+ min_rhs_value = min(rhs_values) |
|
| 158 |
+ max_rhs_value = max(rhs_values) |
|
| 168 | 159 |
|
| 169 | 160 |
met_threshold_pt = -1 |
| 170 | 161 |
if threshold_value != -1: |
| 171 |
- met_threshold_pt = index_id( retweet_values, operator.ge, threshold_value ) |
|
| 162 |
+ met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value) |
|
| 172 | 163 |
if met_threshold_pt == -1 or tag_hit: |
| 173 | 164 |
# This can happen if threshold_value was set to a number |
| 174 | 165 |
# because the author or a tag was matched, but the article |
| ... | ... |
@@ -182,30 +173,30 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
| 182 | 173 |
bg_color = odd_background |
| 183 | 174 |
watermark_color = odd_watermark |
| 184 | 175 |
|
| 185 |
- if len( comment_values ) < 8 and len( comment_values ) > 1: |
|
| 176 |
+ if len(lhs_values) < 8 and len(lhs_values) > 1: |
|
| 186 | 177 |
# max_comment_value *= 2 |
| 187 | 178 |
pass |
| 188 |
- elif len( comment_values ) == 1: |
|
| 179 |
+ elif len(lhs_values) == 1: |
|
| 189 | 180 |
min_comment_value = 0 |
| 190 |
- if len( retweet_values ) < 8 and len( retweet_values ) > 1: |
|
| 191 |
- # max_retweet_value *= 2 |
|
| 181 |
+ if len(rhs_values) < 8 and len(rhs_values) > 1: |
|
| 182 |
+ # max_rhs_value *= 2 |
|
| 192 | 183 |
pass |
| 193 |
- elif len( retweet_values ) == 1: |
|
| 194 |
- min_retweet_value = 0 |
|
| 184 |
+ elif len(rhs_values) == 1: |
|
| 185 |
+ min_rhs_value = 0 |
|
| 195 | 186 |
|
| 196 | 187 |
min_comment_value = 0 |
| 197 |
- min_retweet_value = 0 |
|
| 188 |
+ min_rhs_value = 0 |
|
| 198 | 189 |
|
| 199 | 190 |
chart_url = "http://chart.apis.google.com/chart?cht=lxy&chco=%s,%s&chs=%dx%d&chxs=0,%s|1,%s" % \ |
| 200 | 191 |
(series_1_color, series_2_color, img_width, img_height, series_1_color, series_2_color) |
| 201 |
- chart_url += "&chd=t:%s|%s|%s|%s" % ( ','.join( [ str( n ) for n in comment_times ] ), |
|
| 202 |
- ','.join( [ str( n ) for n in comment_values ] ), |
|
| 203 |
- ','.join( [ str( n ) for n in retweet_times ] ), |
|
| 204 |
- ','.join( [ str( n ) for n in retweet_values ] ) ) |
|
| 192 |
+ chart_url += "&chd=t:%s|%s|%s|%s" % (','.join([str(n) for n in lhs_times]),
|
|
| 193 |
+ ','.join([str(n) for n in lhs_values]), |
|
| 194 |
+ ','.join([str(n) for n in rhs_times]), |
|
| 195 |
+ ','.join([str(n) for n in rhs_values])) |
|
| 205 | 196 |
# TODO: Consider watermark levels, like: |
| 206 | 197 |
# chm=h,B0B0B0,1,0.3,1|r,E0E0E0,0,0,0.5 |
| 207 |
- if max_retweet_value > 0: |
|
| 208 |
- threshold_percent = max( 0, min( (float(threshold_value) / max_retweet_value) - 0.01, 1.0 ) ) |
|
| 198 |
+ if max_rhs_value > 0: |
|
| 199 |
+ threshold_percent = max(0, min((float(threshold_value) / max_rhs_value) - 0.01, 1.0)) |
|
| 209 | 200 |
else: |
| 210 | 201 |
threshold_percent = 1.0 |
| 211 | 202 |
chart_url += "&chm=r,%s,0,0,%1.3f" % (watermark_color, threshold_percent) |
| ... | ... |
@@ -218,12 +209,12 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
| 218 | 209 |
dot_shape = 'o' |
| 219 | 210 |
chart_url += "|%s,%s,1,%d,10" % (dot_shape, dot_color, met_threshold_pt) |
| 220 | 211 |
chart_url += "&chxt=y,r&chxl=0:|%d|%d|1:|%d|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \ |
| 221 |
- ( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value, |
|
| 212 |
+ (min_comment_value, max_comment_value, min_rhs_value, max_rhs_value, |
|
| 222 | 213 |
0, max(7, max_comment_time), |
| 223 | 214 |
min_comment_value, max_comment_value, |
| 224 |
- 0, max( 7, max_retweet_time ), |
|
| 225 |
- min_comment_value, max_retweet_value ) |
|
| 226 |
- chart_url += "&chf=bg,s,%s&chdl=comments|retweets" % ( bg_color, ) |
|
| 215 |
+ 0, max(7, max_rhs_time), |
|
| 216 |
+ min_comment_value, max_rhs_value) |
|
| 217 |
+ chart_url += "&chf=bg,s,%s&chdl=comments|shares" % (bg_color,) |
|
| 227 | 218 |
return chart_url |
| 228 | 219 |
|
| 229 | 220 |
|
| ... | ... |
@@ -276,8 +267,6 @@ def process_feed( yaml_items ): |
| 276 | 267 |
while len(yaml_items) > 200: |
| 277 | 268 |
yaml_items.pop() |
| 278 | 269 |
|
| 279 |
-# cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) ) |
|
| 280 |
- |
|
| 281 | 270 |
for i in yaml_items: |
| 282 | 271 |
# i['title'] = asciiize(i['title']) |
| 283 | 272 |
# i['tags'] = map(asciiize, i['tags']) |
| ... | ... |
@@ -286,7 +275,7 @@ def process_feed( yaml_items ): |
| 286 | 275 |
else: |
| 287 | 276 |
if hasattr(feed, 'bozo_exception'): |
| 288 | 277 |
e = feed.bozo_exception |
| 289 |
- if isinstance( e, urllib2.URLError ): # e.__class__ == urllib2.URLError: # and hasattr(e, 'errno') and e.errno == 110: |
|
| 278 |
+ if isinstance(e, urllib2.URLError): |
|
| 290 | 279 |
print_last_line = True |
| 291 | 280 |
if hasattr(e, 'reason'): |
| 292 | 281 |
if e.reason[0] == 110: |
| ... | ... |
@@ -354,12 +343,11 @@ def process_item( feed_item, yaml_items ): |
| 354 | 343 |
'orig_posted' : timecode_parsed, |
| 355 | 344 |
'qualified' : -1, |
| 356 | 345 |
'comment_times' : [], |
| 357 |
- 'comments' : [], |
|
| 346 |
+ 'fb_comments' : [], |
|
| 358 | 347 |
'fb_shares' : [], |
| 348 |
+ 'fb_likes' : [], |
|
| 359 | 349 |
'slash_comment_times' : [], |
| 360 |
- 'slash_comments' : [], |
|
| 361 |
- 'retweet_times' : [], |
|
| 362 |
- 'retweets' : [] |
|
| 350 |
+ 'slash_comments' : [] |
|
| 363 | 351 |
} |
| 364 | 352 |
if hasattr(feed_item, 'tags'): |
| 365 | 353 |
for i in feed_item.tags: |
| ... | ... |
@@ -383,205 +371,53 @@ def process_yaml_item( yaml_item ): |
| 383 | 371 |
global any_entry_added |
| 384 | 372 |
|
| 385 | 373 |
timecode_now = int(time.time()) |
| 386 |
- if len( yaml_item['comments'] ) < 8: |
|
| 387 |
- num_shares, num_comments = Get_fb_stats( yaml_item['link'] ) |
|
| 388 |
-# disqus_id = Get_disqus_id( yaml_item ) |
|
| 389 |
-# num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie ) |
|
| 374 |
+ if len(yaml_item['fb_comments']) < 8: |
|
| 375 |
+ num_shares, num_comments, num_likes = Get_fb_stats(yaml_item['link']) |
|
| 390 | 376 |
if num_comments != -1: |
| 391 | 377 |
any_entry_added = True |
| 392 | 378 |
yaml_item['comment_times'].append(timecode_now) |
| 393 |
- yaml_item['comments'].append( num_comments ) |
|
| 394 | 379 |
yaml_item['fb_shares'].append(num_shares) |
| 380 |
+ yaml_item['fb_comments'].append(num_comments) |
|
| 381 |
+ yaml_item['fb_likes'].append(num_likes) |
|
| 395 | 382 |
|
| 396 |
- if len( yaml_item['retweets'] ) < 8: |
|
| 397 |
- num_retweets = Get_num_retweets( yaml_item ) |
|
| 398 |
- if num_retweets != -1: |
|
| 399 |
- any_entry_added = True |
|
| 400 |
- yaml_item['retweet_times'].append( timecode_now ) |
|
| 401 |
- yaml_item['retweets'].append( num_retweets ) |
|
| 383 |
+# if len(yaml_item['reddit_']) < 8: |
|
| 384 |
+# num_ = Get_reddit_stats(yaml_item['link']) |
|
| 385 |
+# if num_ != -1: |
|
| 386 |
+# any_entry_added = True |
|
| 387 |
+# yaml_item['reddit_times'].append(timecode_now) |
|
| 388 |
+# yaml_item['reddit_'].append(num_) |
|
| 402 | 389 |
|
| 403 | 390 |
|
| 404 |
-def Get_num_comments( url_string ): |
|
| 405 |
- try: |
|
| 406 |
- f = urllib2.urlopen( url_string ) |
|
| 407 |
- data = f.read() |
|
| 408 |
- f.close() |
|
| 409 |
- except urllib2.URLError, e: |
|
| 410 |
- if hasattr( e, 'reason' ): |
|
| 411 |
- print "Get_num_comments got an error:", e.reason |
|
| 412 |
- elif hasattr( e, 'code' ): |
|
| 413 |
- print "Get_num_comments got an error. Code:", e.code |
|
| 414 |
- return -1 |
|
| 415 |
- tag_to_find = '<a href="#comments" rel="nofollow">' |
|
| 416 |
- offset = data.find( tag_to_find ) |
|
| 417 |
- if offset != -1: |
|
| 418 |
- start_pos = offset + len( tag_to_find ) |
|
| 419 |
- end_pos = start_pos |
|
| 420 |
- while str.isdigit( data[ end_pos ] ): |
|
| 421 |
- end_pos += 1 |
|
| 422 |
- if end_pos > start_pos: |
|
| 423 |
- return int( data[start_pos:end_pos] ) |
|
| 391 |
+def Get_reddit_stats(url_string): |
|
| 392 |
+ """ Consider curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
|
| 393 |
+ """ |
|
| 424 | 394 |
return -1 |
| 425 | 395 |
|
| 426 | 396 |
|
| 427 |
-def Get_cookie( cookie_request ): |
|
| 428 |
- cookie = cookielib.CookieJar() |
|
| 429 |
- error_string = "Get_cookie didn't." |
|
| 430 |
- try: |
|
| 431 |
- cookie_response = urllib2.urlopen( cookie_request ) |
|
| 432 |
- cookie.extract_cookies( cookie_response, cookie_request ) |
|
| 433 |
- return cookie |
|
| 434 |
- except urllib2.URLError, e: |
|
| 435 |
- if hasattr( e, 'reason' ): |
|
| 436 |
- error_string = "Get_cookie got an error: %s" % ( str( e.reason ) ) |
|
| 437 |
- elif hasattr( e, 'code' ): |
|
| 438 |
- error_string = "Get_cookie got an error. Code: %s" % ( str( e.code ) ) |
|
| 439 |
- print error_string |
|
| 440 |
- return None |
|
| 441 |
- |
|
| 397 |
+def Get_fb_stats(url_string): |
|
| 398 |
+ """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want, |
|
| 399 |
+ and two, with URL id. They go like this: |
|
| 442 | 400 |
|
| 443 |
-def Get_disqus_id( yaml_item ): |
|
| 444 |
- if 'disqus_id' in yaml_item: |
|
| 445 |
- return yaml_item['disqus_id'] |
|
| 446 |
- url_get_data = '' |
|
| 447 |
- try: |
|
| 448 |
- f = urllib2.urlopen( yaml_item['link'] ) |
|
| 449 |
- data = f.read() |
|
| 450 |
- f.close() |
|
| 451 |
- except urllib2.URLError, e: |
|
| 452 |
- if hasattr( e, 'reason' ): |
|
| 453 |
- print "Get_disqus_id got an error:", e.reason |
|
| 454 |
- elif hasattr( e, 'code' ): |
|
| 455 |
- print "Get_disqus_id got an error. Code:", e.code, yaml_item['link'] |
|
| 456 |
- return url_get_data |
|
| 457 |
- except httplib.BadStatusLine, e: |
|
| 458 |
- print "Get_discus_id got a BadStatusLine:", str( e ) |
|
| 459 |
- return url_get_data |
|
| 460 |
- |
|
| 461 |
- tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="' |
|
| 462 |
- offset = data.find( tag_to_find ) |
|
| 463 |
- if offset != -1: |
|
| 464 |
- start_pos = offset + len( tag_to_find ) |
|
| 465 |
- end_pos = start_pos |
|
| 466 |
- while data[ end_pos ] != '"' and end_pos < start_pos + 200: |
|
| 467 |
- end_pos += 1 |
|
| 468 |
- if end_pos < start_pos + 200: |
|
| 469 |
- url_get_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
| 470 |
- yaml_item['disqus_id'] = url_get_data |
|
| 471 |
-# else: |
|
| 472 |
-# print "Get_disqus_id could not find #comments anchor for", yaml_item['link'] |
|
| 473 |
- return url_get_data |
|
| 474 |
- |
|
| 475 |
- |
|
| 476 |
-def Get_num_disqus_comments( url_string, disqus_id, cookie ): |
|
| 477 |
- |
|
| 478 |
- if cookie == None or disqus_id == '': |
|
| 479 |
- return -1 |
|
| 401 |
+ FQL: |
|
| 480 | 402 |
|
| 481 |
- opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cookie ) ) |
|
| 482 |
- request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + disqus_id ) |
|
| 483 |
- try: |
|
| 484 |
- response = opener.open( request ) |
|
| 485 |
- disqus_data = response.read() |
|
| 486 |
- except urllib2.URLError, e: |
|
| 487 |
- if hasattr( e, 'reason' ): |
|
| 488 |
- print "Get_num_disqus_comments got an error getting the count:", e.reason |
|
| 489 |
- elif hasattr( e, 'code' ): |
|
| 490 |
- print "Get_num_disqus_comments got an error getting the count. Code:", e.code |
|
| 491 |
- return -1 |
|
| 492 |
- disqus_tag_to_find = 'displayCount('
|
|
| 493 |
- disqus_offset = disqus_data.find( disqus_tag_to_find ) |
|
| 494 |
- if disqus_offset != -1: |
|
| 495 |
- start_pos = disqus_offset + len( disqus_tag_to_find ) |
|
| 496 |
- end_pos = disqus_data.find( '}]})', start_pos ) |
|
| 497 |
- if end_pos != -1: |
|
| 498 |
- return int( json.loads( disqus_data[start_pos:end_pos+3] )['counts'][0]['comments'] ) |
|
| 499 |
- else: |
|
| 500 |
- print "Get_num_disqus_comments found no disqus tag for", url_string |
|
| 501 |
- return -1 |
|
| 403 |
+ u = urllib.quote_plus(url_string) |
|
| 404 |
+ urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u))
|
|
| 502 | 405 |
|
| 406 |
+ URL ID: |
|
| 503 | 407 |
|
| 504 |
-def Get_num_retweets_unused( yaml_item ): |
|
| 505 |
- """ TODO: Support for retweents has been removed. |
|
| 506 |
- See: https://twittercommunity.com/t/a-new-design-for-tweet-and-follow-buttons/52791 |
|
| 507 |
- So instead, use facebook. |
|
| 508 |
- curl https://graph.facebook.com/fql?q=SELECT%20total_count,comment_count,like_count,share_count%20FROM%20link_stat%20WHERE%20url=%27http://techcrunch.com/2015/11/22/the-real-reason-on-demand-startups-are-reclassifying-workers/?ncid=rss%27 |
|
| 408 |
+ u = urllib.quote_plus(url_string) |
|
| 409 |
+ with open('/home/dblume/oauth.dlma.com/facebook-token.txt', 'r') as f:
|
|
| 410 |
+ token = f.read() |
|
| 411 |
+ encoded = urllib.urlencode({'access_token': token})
|
|
| 412 |
+ urllib2.urlopen('https://graph.facebook.com/v2.5/?id=%s&%s' % (u, encoded)
|
|
| 509 | 413 |
""" |
| 510 |
- url_string = yaml_item['link'] |
|
| 511 |
- try: |
|
| 512 |
- f = urllib2.urlopen( 'http://api.tweetmeme.com/button.js?url=%s' % ( url_string ) ) |
|
| 513 |
- data = f.read() |
|
| 514 |
- f.close() |
|
| 515 |
- except urllib2.URLError, e: |
|
| 516 |
- if hasattr( e, 'reason' ): |
|
| 517 |
- print "Get_num_retweets got an error:", e.reason |
|
| 518 |
- elif hasattr( e, 'code' ): |
|
| 519 |
- print "Get_num_retweets got an error. Code:", e.code |
|
| 520 |
- return -1 |
|
| 521 |
- tag_to_find = '<span class="c">' |
|
| 522 |
- offset = data.find( tag_to_find ) |
|
| 523 |
- if offset != -1: |
|
| 524 |
- start_pos = offset + len( tag_to_find ) |
|
| 525 |
- end_pos = data.find( '<', start_pos ) |
|
| 526 |
- if end_pos != -1: |
|
| 527 |
- try: |
|
| 528 |
- return int( data[ start_pos:end_pos ] ) |
|
| 529 |
- except ValueError, e: |
|
| 530 |
- if data[ start_pos:end_pos ] != '?': |
|
| 531 |
- print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], ) |
|
| 532 |
- else: |
|
| 533 |
- print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \ |
|
| 534 |
- ( yaml_item['title'][:20], |
|
| 535 |
- texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) ) |
|
| 536 |
- ) |
|
| 537 |
- return -1 |
|
| 538 |
- |
|
| 539 |
- |
|
| 540 |
-def Get_num_retweets( yaml_item ): |
|
| 541 |
- """ TODO: Support for retweents has been removed. |
|
| 542 |
- See: https://twittercommunity.com/t/a-new-design-for-tweet-and-follow-buttons/52791 |
|
| 543 |
- So instead, use facebook. |
|
| 544 |
- curl https://graph.facebook.com/fql?q=SELECT%20total_count,comment_count,like_count,share_count%20FROM%20link_stat%20WHERE%20url=%27http://techcrunch.com/2015/11/22/the-real-reason-on-demand-startups-are-reclassifying-workers/?ncid=rss%27 |
|
| 545 |
- """ |
|
| 546 |
- url_string = yaml_item['link'] |
|
| 547 |
- try: |
|
| 548 |
- f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % \ |
|
| 549 |
- urllib.quote_plus( url_string ) ) |
|
| 550 |
- data = f.read() |
|
| 551 |
- f.close() |
|
| 552 |
- except (urllib2.URLError, httplib.BadStatusLine), e: |
|
| 553 |
- if hasattr( e, 'reason' ): |
|
| 554 |
- print "Get_num_retweets got an error:", e.reason |
|
| 555 |
- elif hasattr( e, 'code' ): |
|
| 556 |
- print "Get_num_retweets got an error. Code:", e.code |
|
| 557 |
- else: |
|
| 558 |
- print "Get_num_retweets got an error:", str( e ) |
|
| 559 |
- return -1 |
|
| 560 |
- tag_to_find = '"count":' |
|
| 561 |
- offset = data.find( tag_to_find ) |
|
| 562 |
- if offset != -1: |
|
| 563 |
- start_pos = offset + len( tag_to_find ) |
|
| 564 |
- end_pos = data.find( ',', start_pos ) |
|
| 565 |
- if end_pos != -1: |
|
| 566 |
- try: |
|
| 567 |
- return int( data[ start_pos:end_pos ] ) |
|
| 568 |
- except ValueError, e: |
|
| 569 |
- if data[ start_pos:end_pos ] != '?': |
|
| 570 |
- print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], ) |
|
| 571 |
- else: |
|
| 572 |
- print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \ |
|
| 573 |
- ( yaml_item['title'][:20], |
|
| 574 |
- texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) ) |
|
| 575 |
- ) |
|
| 576 |
- return -1 |
|
| 577 |
- |
|
| 578 |
- |
|
| 579 |
-def Get_fb_stats( url_string ): |
|
| 580 |
- """ Returns shares and comments """ |
|
| 581 | 414 |
shares = -1 |
| 582 | 415 |
comments = -1 |
| 416 |
+ likes = -1 |
|
| 417 |
+ |
|
| 583 | 418 |
try: |
| 584 |
- f = urllib2.urlopen( 'https://graph.facebook.com/?ids=' + url_string ) |
|
| 419 |
+ url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' |
|
| 420 |
+ f = urllib2.urlopen(url % (urllib.quote_plus(url_string))) |
|
| 585 | 421 |
data = f.read() |
| 586 | 422 |
f.close() |
| 587 | 423 |
except (urllib2.URLError, httplib.BadStatusLine), e: |
| ... | ... |
@@ -591,20 +427,24 @@ def Get_fb_stats( url_string ): |
| 591 | 427 |
print "Get_fb_stats got an error. Code:", e.code, url_string |
| 592 | 428 |
else: |
| 593 | 429 |
print "Get_fb_stats got an error:", str(e) |
| 594 |
- return -1, -1 |
|
| 595 |
- if len( data ) > len( url_string ): |
|
| 596 |
- d = json.loads( data ).values()[0] |
|
| 597 |
- if d.has_key( 'shares' ): |
|
| 598 |
- shares = d['shares'] |
|
| 430 |
+ return shares, comments, likes |
|
| 431 |
+ if len(data) > 20: |
|
| 432 |
+ d = json.loads(data)['data'][0] |
|
| 433 |
+ if 'like_count' in d: |
|
| 434 |
+ likes = d['like_count'] |
|
| 599 | 435 |
else: |
| 600 |
- shares = 0 |
|
| 601 |
- if d.has_key( 'comments' ): |
|
| 602 |
- comments = d['comments'] |
|
| 436 |
+ likes = 0 |
|
| 437 |
+ if 'comment_count' in d: |
|
| 438 |
+ comments = d['comment_count'] |
|
| 603 | 439 |
else: |
| 604 | 440 |
comments = 0 |
| 441 |
+ if 'share_count' in d: |
|
| 442 |
+ shares = d['share_count'] |
|
| 443 |
+ else: |
|
| 444 |
+ shares = 0 |
|
| 605 | 445 |
else: |
| 606 | 446 |
print "Get_fb_stats got too little data for ", url_string |
| 607 |
- return shares, comments |
|
| 447 |
+ return shares, comments, likes |
|
| 608 | 448 |
|
| 609 | 449 |
|
| 610 | 450 |
def Save_image(url_string, file_path): |
| ... | ... |
@@ -655,15 +495,15 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ): |
| 655 | 495 |
tag_hit = True |
| 656 | 496 |
chart_url = make_chart_url(image['orig_posted'], |
| 657 | 497 |
image['comment_times'], |
| 658 |
- image['comments'], |
|
| 659 |
- image['retweet_times'], |
|
| 660 |
- image['retweets'], |
|
| 498 |
+ image['fb_comments'], |
|
| 499 |
+ image[rhs_metric_times], |
|
| 500 |
+ image[rhs_metric], |
|
| 661 | 501 |
image['qualified'], |
| 662 | 502 |
image_index % 2, |
| 663 | 503 |
tag_hit |
| 664 | 504 |
) |
| 665 | 505 |
# if image['title'].startswith( 'Too ' ): |
| 666 |
-# print image['title'], image['qualified'], image['retweet_times'] |
|
| 506 |
+# print image['title'], image['qualified'], image['rhs_times'] |
|
| 667 | 507 |
# print chart_url |
| 668 | 508 |
image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
| 669 | 509 |
f.write('<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
|
| ... | ... |
@@ -727,12 +567,11 @@ if __name__=='__main__': |
| 727 | 567 |
# 'tags' : [ u'Google', u'privacy' ] |
| 728 | 568 |
# 'qualified' : -1 |
| 729 | 569 |
# 'comment_times' : [ 1282197199, 1282197407 ] |
| 730 |
- # 'comments' : [ 0, 15 ] |
|
| 731 |
- # 'fb_shares' : [ 0, 3 ] |
|
| 570 |
+ # 'fb_comments' : [ 0, 5 ] |
|
| 571 |
+ # 'fb_shares' : [ 0, 300 ] |
|
| 572 |
+ # 'fb_likes' : [ 0, 19 ] |
|
| 732 | 573 |
# 'slash_comment_times' : [ 1282197199, 1282197407 ] |
| 733 | 574 |
# 'slash_comments' : [ 0, 5 ] |
| 734 |
- # 'retweet_times' : [ 1282197199, 1282197407 ] |
|
| 735 |
- # 'retweets' : [ 0, 43 ] |
|
| 736 | 575 |
# }, |
| 737 | 576 |
# { ... }
|
| 738 | 577 |
# ] |
| ... | ... |
@@ -757,8 +596,7 @@ if __name__=='__main__': |
| 757 | 596 |
# If any work was done, then write files. |
| 758 | 597 |
# |
| 759 | 598 |
if any_entry_added: |
| 760 |
- |
|
| 761 |
- weekend_stats, weekday_stats = analysis.Process_retweets_for_feed( items ) |
|
| 599 |
+ weekend_stats, weekday_stats = analysis.Process_feed(items, rhs_metric, rhs_metric_times) |
|
| 762 | 600 |
|
| 763 | 601 |
# We'll only look at the stats for the time 1:00 to 1:30 after posting. |
| 764 | 602 |
weekend_median, weekend_mean, weekend_sigma = weekend_stats[2] |
| ... | ... |
@@ -772,17 +610,17 @@ if __name__=='__main__': |
| 772 | 610 |
else: |
| 773 | 611 |
threshold = weekday_threshold |
| 774 | 612 |
if item['qualified'] == -1: |
| 775 |
- for i in range( len( item['retweet_times'] ) ): |
|
| 776 |
- r_time = item['retweet_times'][i] |
|
| 613 |
+ for i in range(len(item[rhs_metric_times])): |
|
| 614 |
+ r_time = item[rhs_metric_times][i] |
|
| 777 | 615 |
if r_time - item['orig_posted'] < 5400: |
| 778 |
- if item['retweets'][i] >= threshold: |
|
| 616 |
+ if item[rhs_metric][i] >= threshold: |
|
| 779 | 617 |
item['qualified'] = threshold |
| 780 | 618 |
if r_time - item['orig_posted'] >= 3600: |
| 781 | 619 |
break |
| 782 | 620 |
|
| 783 | 621 |
# Automatically add those items whose authors and tags I like |
| 784 | 622 |
for item in items: |
| 785 |
- if item['qualified'] == -1 and len( item['retweet_times'] ) > 0: |
|
| 623 |
+ if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0: |
|
| 786 | 624 |
if item['author'].lower() in authors_to_post: |
| 787 | 625 |
item['qualified'] = threshold |
| 788 | 626 |
elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0: |
| 789 | 627 |