David Blume commited on 2018-01-20 20:31:35
Showing 1 changed files, with 110 additions and 272 deletions.
... | ... |
@@ -1,19 +1,18 @@ |
1 | 1 |
#!/usr/bin/python |
2 |
-# chmod 755 me, and make sure I have UNIX style newlines. |
|
3 | 2 |
# |
4 | 3 |
# techcrunch.py |
5 | 4 |
# |
6 |
-# http://feeds.feedburner.com/TechCrunch |
|
7 |
-# feed = feedparser.parse( 'http://feeds.feedburner.com/TechCrunch' ) |
|
8 |
-# feed.entries[14]['feedburner_origlink'], feed.entries[14]['slash_comments'] |
|
5 |
+# For reference: See the SVN history of this file to see how I implemented |
|
6 |
+# 1. retweet counts |
|
7 |
+# 2. slash comment counts |
|
8 |
+# 3. disqus comment counts, and the cookie for them |
|
9 |
+# http://websvn.dlma.com/filedetails.php?repname=private&path=%2Fwww%2Ftechcrunch.dlma.com%2Ftrunk%2Ftechcrunch.py |
|
9 | 10 |
# |
10 | 11 |
# TODO: |
11 | 12 |
# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>' |
12 | 13 |
# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/" |
13 | 14 |
# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
14 | 15 |
# |
15 |
-# This file was coverted from tabs to spaces with the vim command %retab |
|
16 |
-# |
|
17 | 16 |
# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
18 | 17 |
# |
19 | 18 |
|
... | ... |
@@ -34,10 +33,8 @@ import httplib |
34 | 33 |
import shutil |
35 | 34 |
import glob |
36 | 35 |
import smtplib |
37 |
-import bisect |
|
38 | 36 |
import analysis |
39 | 37 |
import json |
40 |
-import cookielib |
|
41 | 38 |
import xml |
42 | 39 |
import texttime |
43 | 40 |
import operator |
... | ... |
@@ -49,6 +46,9 @@ any_entry_added = False |
49 | 46 |
tags_to_post = set(['apple', 'google']) |
50 | 47 |
authors_to_post = ['michael arrington',] |
51 | 48 |
|
49 |
+rhs_metric = 'fb_shares' |
|
50 |
+rhs_metric_times = 'comment_times' |
|
51 |
+ |
|
52 | 52 |
localdir = '' |
53 | 53 |
|
54 | 54 |
html_head = """ |
... | ... |
@@ -127,48 +127,39 @@ def index_id( a_list, op, elem ): |
127 | 127 |
return -1 |
128 | 128 |
|
129 | 129 |
|
130 |
-def index_id_simple( a_list, elem ): |
|
131 |
- index = 0 |
|
132 |
- for item in a_list: |
|
133 |
- if item == elem: |
|
134 |
- return index |
|
135 |
- index += 1 |
|
136 |
- return -1 |
|
137 |
- |
|
138 |
- |
|
139 |
-def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
|
140 |
- retweet_values, threshold_value, is_odd_row, tag_hit ): |
|
141 |
-# comment_times, comment_values = zip( *comments ) |
|
142 |
-# retweet_times, retweet_values = zip( *retweets ) |
|
130 |
+def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
|
131 |
+ rhs_values, threshold_value, is_odd_row, tag_hit): |
|
132 |
+# lhs_times, lhs_values = zip(*comments) |
|
133 |
+# rhs_times, rhs_values = zip(*rhs) |
|
143 | 134 |
|
144 | 135 |
# TODO handle failure cases, -1 |
145 | 136 |
|
146 |
- if not len( comment_times ): |
|
147 |
- comment_times = [ time_posted, ] |
|
148 |
- if not len( comment_values ): |
|
149 |
- comment_values = [ 0, ] |
|
150 |
- if not len( retweet_times ): |
|
151 |
- retweet_times = [ time_posted, ] |
|
152 |
- if not len( retweet_values ): |
|
153 |
- retweet_values = [ 0, ] |
|
154 |
- |
|
155 |
-# comment_times = [ (i - time_posted + 900) / 1800 for i in comment_times ] |
|
156 |
-# retweet_times = [ (i - time_posted + 900) / 1800 for i in retweet_times ] |
|
157 |
- comment_times = [ (i - time_posted) / 1800 for i in comment_times ] |
|
158 |
- retweet_times = [ (i - time_posted) / 1800 for i in retweet_times ] |
|
159 |
- |
|
160 |
- min_comment_time = min( comment_times ) |
|
161 |
- max_comment_time = max( comment_times ) |
|
162 |
- min_comment_value = min( comment_values ) |
|
163 |
- max_comment_value = max( comment_values ) |
|
164 |
- min_retweet_time = min( retweet_times ) |
|
165 |
- max_retweet_time = max( retweet_times ) |
|
166 |
- min_retweet_value = min( retweet_values ) |
|
167 |
- max_retweet_value = max( retweet_values ) |
|
137 |
+ if not len(lhs_times): |
|
138 |
+ lhs_times = [time_posted,] |
|
139 |
+ if not len(lhs_values): |
|
140 |
+ lhs_values = [0,] |
|
141 |
+ if not len(rhs_times): |
|
142 |
+ rhs_times = [time_posted,] |
|
143 |
+ if not len(rhs_values): |
|
144 |
+ rhs_values = [0,] |
|
145 |
+ |
|
146 |
+# lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times] |
|
147 |
+# rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times] |
|
148 |
+ lhs_times = [(i - time_posted) / 1800 for i in lhs_times] |
|
149 |
+ rhs_times = [(i - time_posted) / 1800 for i in rhs_times] |
|
150 |
+ |
|
151 |
+ min_comment_time = min(lhs_times) |
|
152 |
+ max_comment_time = max(lhs_times) |
|
153 |
+ min_comment_value = min(lhs_values) |
|
154 |
+ max_comment_value = max(lhs_values) |
|
155 |
+ min_rhs_time = min(rhs_times) |
|
156 |
+ max_rhs_time = max(rhs_times) |
|
157 |
+ min_rhs_value = min(rhs_values) |
|
158 |
+ max_rhs_value = max(rhs_values) |
|
168 | 159 |
|
169 | 160 |
met_threshold_pt = -1 |
170 | 161 |
if threshold_value != -1: |
171 |
- met_threshold_pt = index_id( retweet_values, operator.ge, threshold_value ) |
|
162 |
+ met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value) |
|
172 | 163 |
if met_threshold_pt == -1 or tag_hit: |
173 | 164 |
# This can happen if threshold_value was set to a number |
174 | 165 |
# because the author or a tag was matched, but the article |
... | ... |
@@ -182,30 +173,30 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
182 | 173 |
bg_color = odd_background |
183 | 174 |
watermark_color = odd_watermark |
184 | 175 |
|
185 |
- if len( comment_values ) < 8 and len( comment_values ) > 1: |
|
176 |
+ if len(lhs_values) < 8 and len(lhs_values) > 1: |
|
186 | 177 |
# max_comment_value *= 2 |
187 | 178 |
pass |
188 |
- elif len( comment_values ) == 1: |
|
179 |
+ elif len(lhs_values) == 1: |
|
189 | 180 |
min_comment_value = 0 |
190 |
- if len( retweet_values ) < 8 and len( retweet_values ) > 1: |
|
191 |
- # max_retweet_value *= 2 |
|
181 |
+ if len(rhs_values) < 8 and len(rhs_values) > 1: |
|
182 |
+ # max_rhs_value *= 2 |
|
192 | 183 |
pass |
193 |
- elif len( retweet_values ) == 1: |
|
194 |
- min_retweet_value = 0 |
|
184 |
+ elif len(rhs_values) == 1: |
|
185 |
+ min_rhs_value = 0 |
|
195 | 186 |
|
196 | 187 |
min_comment_value = 0 |
197 |
- min_retweet_value = 0 |
|
188 |
+ min_rhs_value = 0 |
|
198 | 189 |
|
199 | 190 |
chart_url = "http://chart.apis.google.com/chart?cht=lxy&chco=%s,%s&chs=%dx%d&chxs=0,%s|1,%s" % \ |
200 | 191 |
(series_1_color, series_2_color, img_width, img_height, series_1_color, series_2_color) |
201 |
- chart_url += "&chd=t:%s|%s|%s|%s" % ( ','.join( [ str( n ) for n in comment_times ] ), |
|
202 |
- ','.join( [ str( n ) for n in comment_values ] ), |
|
203 |
- ','.join( [ str( n ) for n in retweet_times ] ), |
|
204 |
- ','.join( [ str( n ) for n in retweet_values ] ) ) |
|
192 |
+ chart_url += "&chd=t:%s|%s|%s|%s" % (','.join([str(n) for n in lhs_times]), |
|
193 |
+ ','.join([str(n) for n in lhs_values]), |
|
194 |
+ ','.join([str(n) for n in rhs_times]), |
|
195 |
+ ','.join([str(n) for n in rhs_values])) |
|
205 | 196 |
# TODO: Consider watermark levels, like: |
206 | 197 |
# chm=h,B0B0B0,1,0.3,1|r,E0E0E0,0,0,0.5 |
207 |
- if max_retweet_value > 0: |
|
208 |
- threshold_percent = max( 0, min( (float(threshold_value) / max_retweet_value) - 0.01, 1.0 ) ) |
|
198 |
+ if max_rhs_value > 0: |
|
199 |
+ threshold_percent = max(0, min((float(threshold_value) / max_rhs_value) - 0.01, 1.0)) |
|
209 | 200 |
else: |
210 | 201 |
threshold_percent = 1.0 |
211 | 202 |
chart_url += "&chm=r,%s,0,0,%1.3f" % (watermark_color, threshold_percent) |
... | ... |
@@ -218,12 +209,12 @@ def make_chart_url( time_posted, comment_times, comment_values, retweet_times, |
218 | 209 |
dot_shape = 'o' |
219 | 210 |
chart_url += "|%s,%s,1,%d,10" % (dot_shape, dot_color, met_threshold_pt) |
220 | 211 |
chart_url += "&chxt=y,r&chxl=0:|%d|%d|1:|%d|%d&chds=%d,%d,%d,%d,%d,%d,%d,%d" % \ |
221 |
- ( min_comment_value, max_comment_value, min_retweet_value, max_retweet_value, |
|
212 |
+ (min_comment_value, max_comment_value, min_rhs_value, max_rhs_value, |
|
222 | 213 |
0, max(7, max_comment_time), |
223 | 214 |
min_comment_value, max_comment_value, |
224 |
- 0, max( 7, max_retweet_time ), |
|
225 |
- min_comment_value, max_retweet_value ) |
|
226 |
- chart_url += "&chf=bg,s,%s&chdl=comments|retweets" % ( bg_color, ) |
|
215 |
+ 0, max(7, max_rhs_time), |
|
216 |
+ min_comment_value, max_rhs_value) |
|
217 |
+ chart_url += "&chf=bg,s,%s&chdl=comments|shares" % (bg_color,) |
|
227 | 218 |
return chart_url |
228 | 219 |
|
229 | 220 |
|
... | ... |
@@ -276,8 +267,6 @@ def process_feed( yaml_items ): |
276 | 267 |
while len(yaml_items) > 200: |
277 | 268 |
yaml_items.pop() |
278 | 269 |
|
279 |
-# cookie = Get_cookie( urllib2.Request( 'http://mediacdn.disqus.com/1078/build/system/count.js' ) ) |
|
280 |
- |
|
281 | 270 |
for i in yaml_items: |
282 | 271 |
# i['title'] = asciiize(i['title']) |
283 | 272 |
# i['tags'] = map(asciiize, i['tags']) |
... | ... |
@@ -286,7 +275,7 @@ def process_feed( yaml_items ): |
286 | 275 |
else: |
287 | 276 |
if hasattr(feed, 'bozo_exception'): |
288 | 277 |
e = feed.bozo_exception |
289 |
- if isinstance( e, urllib2.URLError ): # e.__class__ == urllib2.URLError: # and hasattr(e, 'errno') and e.errno == 110: |
|
278 |
+ if isinstance(e, urllib2.URLError): |
|
290 | 279 |
print_last_line = True |
291 | 280 |
if hasattr(e, 'reason'): |
292 | 281 |
if e.reason[0] == 110: |
... | ... |
@@ -354,12 +343,11 @@ def process_item( feed_item, yaml_items ): |
354 | 343 |
'orig_posted' : timecode_parsed, |
355 | 344 |
'qualified' : -1, |
356 | 345 |
'comment_times' : [], |
357 |
- 'comments' : [], |
|
346 |
+ 'fb_comments' : [], |
|
358 | 347 |
'fb_shares' : [], |
348 |
+ 'fb_likes' : [], |
|
359 | 349 |
'slash_comment_times' : [], |
360 |
- 'slash_comments' : [], |
|
361 |
- 'retweet_times' : [], |
|
362 |
- 'retweets' : [] |
|
350 |
+ 'slash_comments' : [] |
|
363 | 351 |
} |
364 | 352 |
if hasattr(feed_item, 'tags'): |
365 | 353 |
for i in feed_item.tags: |
... | ... |
@@ -383,205 +371,53 @@ def process_yaml_item( yaml_item ): |
383 | 371 |
global any_entry_added |
384 | 372 |
|
385 | 373 |
timecode_now = int(time.time()) |
386 |
- if len( yaml_item['comments'] ) < 8: |
|
387 |
- num_shares, num_comments = Get_fb_stats( yaml_item['link'] ) |
|
388 |
-# disqus_id = Get_disqus_id( yaml_item ) |
|
389 |
-# num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie ) |
|
374 |
+ if len(yaml_item['fb_comments']) < 8: |
|
375 |
+ num_shares, num_comments, num_likes = Get_fb_stats(yaml_item['link']) |
|
390 | 376 |
if num_comments != -1: |
391 | 377 |
any_entry_added = True |
392 | 378 |
yaml_item['comment_times'].append(timecode_now) |
393 |
- yaml_item['comments'].append( num_comments ) |
|
394 | 379 |
yaml_item['fb_shares'].append(num_shares) |
380 |
+ yaml_item['fb_comments'].append(num_comments) |
|
381 |
+ yaml_item['fb_likes'].append(num_likes) |
|
395 | 382 |
|
396 |
- if len( yaml_item['retweets'] ) < 8: |
|
397 |
- num_retweets = Get_num_retweets( yaml_item ) |
|
398 |
- if num_retweets != -1: |
|
399 |
- any_entry_added = True |
|
400 |
- yaml_item['retweet_times'].append( timecode_now ) |
|
401 |
- yaml_item['retweets'].append( num_retweets ) |
|
383 |
+# if len(yaml_item['reddit_']) < 8: |
|
384 |
+# num_ = Get_reddit_stats(yaml_item['link']) |
|
385 |
+# if num_ != -1: |
|
386 |
+# any_entry_added = True |
|
387 |
+# yaml_item['reddit_times'].append(timecode_now) |
|
388 |
+# yaml_item['reddit_'].append(num_) |
|
402 | 389 |
|
403 | 390 |
|
404 |
-def Get_num_comments( url_string ): |
|
405 |
- try: |
|
406 |
- f = urllib2.urlopen( url_string ) |
|
407 |
- data = f.read() |
|
408 |
- f.close() |
|
409 |
- except urllib2.URLError, e: |
|
410 |
- if hasattr( e, 'reason' ): |
|
411 |
- print "Get_num_comments got an error:", e.reason |
|
412 |
- elif hasattr( e, 'code' ): |
|
413 |
- print "Get_num_comments got an error. Code:", e.code |
|
414 |
- return -1 |
|
415 |
- tag_to_find = '<a href="#comments" rel="nofollow">' |
|
416 |
- offset = data.find( tag_to_find ) |
|
417 |
- if offset != -1: |
|
418 |
- start_pos = offset + len( tag_to_find ) |
|
419 |
- end_pos = start_pos |
|
420 |
- while str.isdigit( data[ end_pos ] ): |
|
421 |
- end_pos += 1 |
|
422 |
- if end_pos > start_pos: |
|
423 |
- return int( data[start_pos:end_pos] ) |
|
391 |
+def Get_reddit_stats(url_string): |
|
392 |
+ """ Consider curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
|
393 |
+ """ |
|
424 | 394 |
return -1 |
425 | 395 |
|
426 | 396 |
|
427 |
-def Get_cookie( cookie_request ): |
|
428 |
- cookie = cookielib.CookieJar() |
|
429 |
- error_string = "Get_cookie didn't." |
|
430 |
- try: |
|
431 |
- cookie_response = urllib2.urlopen( cookie_request ) |
|
432 |
- cookie.extract_cookies( cookie_response, cookie_request ) |
|
433 |
- return cookie |
|
434 |
- except urllib2.URLError, e: |
|
435 |
- if hasattr( e, 'reason' ): |
|
436 |
- error_string = "Get_cookie got an error: %s" % ( str( e.reason ) ) |
|
437 |
- elif hasattr( e, 'code' ): |
|
438 |
- error_string = "Get_cookie got an error. Code: %s" % ( str( e.code ) ) |
|
439 |
- print error_string |
|
440 |
- return None |
|
441 |
- |
|
397 |
+def Get_fb_stats(url_string): |
|
398 |
+ """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want, |
|
399 |
+ and two, with URL id. They go like this: |
|
442 | 400 |
|
443 |
-def Get_disqus_id( yaml_item ): |
|
444 |
- if 'disqus_id' in yaml_item: |
|
445 |
- return yaml_item['disqus_id'] |
|
446 |
- url_get_data = '' |
|
447 |
- try: |
|
448 |
- f = urllib2.urlopen( yaml_item['link'] ) |
|
449 |
- data = f.read() |
|
450 |
- f.close() |
|
451 |
- except urllib2.URLError, e: |
|
452 |
- if hasattr( e, 'reason' ): |
|
453 |
- print "Get_disqus_id got an error:", e.reason |
|
454 |
- elif hasattr( e, 'code' ): |
|
455 |
- print "Get_disqus_id got an error. Code:", e.code, yaml_item['link'] |
|
456 |
- return url_get_data |
|
457 |
- except httplib.BadStatusLine, e: |
|
458 |
- print "Get_discus_id got a BadStatusLine:", str( e ) |
|
459 |
- return url_get_data |
|
460 |
- |
|
461 |
- tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="' |
|
462 |
- offset = data.find( tag_to_find ) |
|
463 |
- if offset != -1: |
|
464 |
- start_pos = offset + len( tag_to_find ) |
|
465 |
- end_pos = start_pos |
|
466 |
- while data[ end_pos ] != '"' and end_pos < start_pos + 200: |
|
467 |
- end_pos += 1 |
|
468 |
- if end_pos < start_pos + 200: |
|
469 |
- url_get_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
470 |
- yaml_item['disqus_id'] = url_get_data |
|
471 |
-# else: |
|
472 |
-# print "Get_disqus_id could not find #comments anchor for", yaml_item['link'] |
|
473 |
- return url_get_data |
|
474 |
- |
|
475 |
- |
|
476 |
-def Get_num_disqus_comments( url_string, disqus_id, cookie ): |
|
477 |
- |
|
478 |
- if cookie == None or disqus_id == '': |
|
479 |
- return -1 |
|
401 |
+ FQL: |
|
480 | 402 |
|
481 |
- opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cookie ) ) |
|
482 |
- request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + disqus_id ) |
|
483 |
- try: |
|
484 |
- response = opener.open( request ) |
|
485 |
- disqus_data = response.read() |
|
486 |
- except urllib2.URLError, e: |
|
487 |
- if hasattr( e, 'reason' ): |
|
488 |
- print "Get_num_disqus_comments got an error getting the count:", e.reason |
|
489 |
- elif hasattr( e, 'code' ): |
|
490 |
- print "Get_num_disqus_comments got an error getting the count. Code:", e.code |
|
491 |
- return -1 |
|
492 |
- disqus_tag_to_find = 'displayCount(' |
|
493 |
- disqus_offset = disqus_data.find( disqus_tag_to_find ) |
|
494 |
- if disqus_offset != -1: |
|
495 |
- start_pos = disqus_offset + len( disqus_tag_to_find ) |
|
496 |
- end_pos = disqus_data.find( '}]})', start_pos ) |
|
497 |
- if end_pos != -1: |
|
498 |
- return int( json.loads( disqus_data[start_pos:end_pos+3] )['counts'][0]['comments'] ) |
|
499 |
- else: |
|
500 |
- print "Get_num_disqus_comments found no disqus tag for", url_string |
|
501 |
- return -1 |
|
403 |
+ u = urllib.quote_plus(url_string) |
|
404 |
+ urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u)) |
|
502 | 405 |
|
406 |
+ URL ID: |
|
503 | 407 |
|
504 |
-def Get_num_retweets_unused( yaml_item ): |
|
505 |
- """ TODO: Support for retweents has been removed. |
|
506 |
- See: https://twittercommunity.com/t/a-new-design-for-tweet-and-follow-buttons/52791 |
|
507 |
- So instead, use facebook. |
|
508 |
- curl https://graph.facebook.com/fql?q=SELECT%20total_count,comment_count,like_count,share_count%20FROM%20link_stat%20WHERE%20url=%27http://techcrunch.com/2015/11/22/the-real-reason-on-demand-startups-are-reclassifying-workers/?ncid=rss%27 |
|
408 |
+ u = urllib.quote_plus(url_string) |
|
409 |
+ with open('/home/dblume/oauth.dlma.com/facebook-token.txt', 'r') as f: |
|
410 |
+ token = f.read() |
|
411 |
+ encoded = urllib.urlencode({'access_token': token}) |
|
412 |
+ urllib2.urlopen('https://graph.facebook.com/v2.5/?id=%s&%s' % (u, encoded) |
|
509 | 413 |
""" |
510 |
- url_string = yaml_item['link'] |
|
511 |
- try: |
|
512 |
- f = urllib2.urlopen( 'http://api.tweetmeme.com/button.js?url=%s' % ( url_string ) ) |
|
513 |
- data = f.read() |
|
514 |
- f.close() |
|
515 |
- except urllib2.URLError, e: |
|
516 |
- if hasattr( e, 'reason' ): |
|
517 |
- print "Get_num_retweets got an error:", e.reason |
|
518 |
- elif hasattr( e, 'code' ): |
|
519 |
- print "Get_num_retweets got an error. Code:", e.code |
|
520 |
- return -1 |
|
521 |
- tag_to_find = '<span class="c">' |
|
522 |
- offset = data.find( tag_to_find ) |
|
523 |
- if offset != -1: |
|
524 |
- start_pos = offset + len( tag_to_find ) |
|
525 |
- end_pos = data.find( '<', start_pos ) |
|
526 |
- if end_pos != -1: |
|
527 |
- try: |
|
528 |
- return int( data[ start_pos:end_pos ] ) |
|
529 |
- except ValueError, e: |
|
530 |
- if data[ start_pos:end_pos ] != '?': |
|
531 |
- print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], ) |
|
532 |
- else: |
|
533 |
- print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \ |
|
534 |
- ( yaml_item['title'][:20], |
|
535 |
- texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) ) |
|
536 |
- ) |
|
537 |
- return -1 |
|
538 |
- |
|
539 |
- |
|
540 |
-def Get_num_retweets( yaml_item ): |
|
541 |
- """ TODO: Support for retweents has been removed. |
|
542 |
- See: https://twittercommunity.com/t/a-new-design-for-tweet-and-follow-buttons/52791 |
|
543 |
- So instead, use facebook. |
|
544 |
- curl https://graph.facebook.com/fql?q=SELECT%20total_count,comment_count,like_count,share_count%20FROM%20link_stat%20WHERE%20url=%27http://techcrunch.com/2015/11/22/the-real-reason-on-demand-startups-are-reclassifying-workers/?ncid=rss%27 |
|
545 |
- """ |
|
546 |
- url_string = yaml_item['link'] |
|
547 |
- try: |
|
548 |
- f = urllib2.urlopen( 'http://urls.api.twitter.com/1/urls/count.json?url=%s&callback=twttr.receiveCount' % \ |
|
549 |
- urllib.quote_plus( url_string ) ) |
|
550 |
- data = f.read() |
|
551 |
- f.close() |
|
552 |
- except (urllib2.URLError, httplib.BadStatusLine), e: |
|
553 |
- if hasattr( e, 'reason' ): |
|
554 |
- print "Get_num_retweets got an error:", e.reason |
|
555 |
- elif hasattr( e, 'code' ): |
|
556 |
- print "Get_num_retweets got an error. Code:", e.code |
|
557 |
- else: |
|
558 |
- print "Get_num_retweets got an error:", str( e ) |
|
559 |
- return -1 |
|
560 |
- tag_to_find = '"count":' |
|
561 |
- offset = data.find( tag_to_find ) |
|
562 |
- if offset != -1: |
|
563 |
- start_pos = offset + len( tag_to_find ) |
|
564 |
- end_pos = data.find( ',', start_pos ) |
|
565 |
- if end_pos != -1: |
|
566 |
- try: |
|
567 |
- return int( data[ start_pos:end_pos ] ) |
|
568 |
- except ValueError, e: |
|
569 |
- if data[ start_pos:end_pos ] != '?': |
|
570 |
- print "Get_num_retweets expected a number but got \"%s\"" % ( data[ start_pos:end_pos ], ) |
|
571 |
- else: |
|
572 |
- print "Get_num_retweets got '?' for \"%s...\", posted %s ago." % \ |
|
573 |
- ( yaml_item['title'][:20], |
|
574 |
- texttime.stringify( timedelta( seconds = time.time() - yaml_item['orig_posted'] ) ) |
|
575 |
- ) |
|
576 |
- return -1 |
|
577 |
- |
|
578 |
- |
|
579 |
-def Get_fb_stats( url_string ): |
|
580 |
- """ Returns shares and comments """ |
|
581 | 414 |
shares = -1 |
582 | 415 |
comments = -1 |
416 |
+ likes = -1 |
|
417 |
+ |
|
583 | 418 |
try: |
584 |
- f = urllib2.urlopen( 'https://graph.facebook.com/?ids=' + url_string ) |
|
419 |
+ url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' |
|
420 |
+ f = urllib2.urlopen(url % (urllib.quote_plus(url_string))) |
|
585 | 421 |
data = f.read() |
586 | 422 |
f.close() |
587 | 423 |
except (urllib2.URLError, httplib.BadStatusLine), e: |
... | ... |
@@ -591,20 +427,24 @@ def Get_fb_stats( url_string ): |
591 | 427 |
print "Get_fb_stats got an error. Code:", e.code, url_string |
592 | 428 |
else: |
593 | 429 |
print "Get_fb_stats got an error:", str(e) |
594 |
- return -1, -1 |
|
595 |
- if len( data ) > len( url_string ): |
|
596 |
- d = json.loads( data ).values()[0] |
|
597 |
- if d.has_key( 'shares' ): |
|
598 |
- shares = d['shares'] |
|
430 |
+ return shares, comments, likes |
|
431 |
+ if len(data) > 20: |
|
432 |
+ d = json.loads(data)['data'][0] |
|
433 |
+ if 'like_count' in d: |
|
434 |
+ likes = d['like_count'] |
|
599 | 435 |
else: |
600 |
- shares = 0 |
|
601 |
- if d.has_key( 'comments' ): |
|
602 |
- comments = d['comments'] |
|
436 |
+ likes = 0 |
|
437 |
+ if 'comment_count' in d: |
|
438 |
+ comments = d['comment_count'] |
|
603 | 439 |
else: |
604 | 440 |
comments = 0 |
441 |
+ if 'share_count' in d: |
|
442 |
+ shares = d['share_count'] |
|
443 |
+ else: |
|
444 |
+ shares = 0 |
|
605 | 445 |
else: |
606 | 446 |
print "Get_fb_stats got too little data for ", url_string |
607 |
- return shares, comments |
|
447 |
+ return shares, comments, likes |
|
608 | 448 |
|
609 | 449 |
|
610 | 450 |
def Save_image(url_string, file_path): |
... | ... |
@@ -655,15 +495,15 @@ def Make_index_html( yaml_items, weekend_stats, weekday_stats ): |
655 | 495 |
tag_hit = True |
656 | 496 |
chart_url = make_chart_url(image['orig_posted'], |
657 | 497 |
image['comment_times'], |
658 |
- image['comments'], |
|
659 |
- image['retweet_times'], |
|
660 |
- image['retweets'], |
|
498 |
+ image['fb_comments'], |
|
499 |
+ image[rhs_metric_times], |
|
500 |
+ image[rhs_metric], |
|
661 | 501 |
image['qualified'], |
662 | 502 |
image_index % 2, |
663 | 503 |
tag_hit |
664 | 504 |
) |
665 | 505 |
# if image['title'].startswith( 'Too ' ): |
666 |
-# print image['title'], image['qualified'], image['retweet_times'] |
|
506 |
+# print image['title'], image['qualified'], image['rhs_times'] |
|
667 | 507 |
# print chart_url |
668 | 508 |
image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
669 | 509 |
f.write('<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \ |
... | ... |
@@ -727,12 +567,11 @@ if __name__=='__main__': |
727 | 567 |
# 'tags' : [ u'Google', u'privacy' ] |
728 | 568 |
# 'qualified' : -1 |
729 | 569 |
# 'comment_times' : [ 1282197199, 1282197407 ] |
730 |
- # 'comments' : [ 0, 15 ] |
|
731 |
- # 'fb_shares' : [ 0, 3 ] |
|
570 |
+ # 'fb_comments' : [ 0, 5 ] |
|
571 |
+ # 'fb_shares' : [ 0, 300 ] |
|
572 |
+ # 'fb_likes' : [ 0, 19 ] |
|
732 | 573 |
# 'slash_comment_times' : [ 1282197199, 1282197407 ] |
733 | 574 |
# 'slash_comments' : [ 0, 5 ] |
734 |
- # 'retweet_times' : [ 1282197199, 1282197407 ] |
|
735 |
- # 'retweets' : [ 0, 43 ] |
|
736 | 575 |
# }, |
737 | 576 |
# { ... } |
738 | 577 |
# ] |
... | ... |
@@ -757,8 +596,7 @@ if __name__=='__main__': |
757 | 596 |
# If any work was done, then write files. |
758 | 597 |
# |
759 | 598 |
if any_entry_added: |
760 |
- |
|
761 |
- weekend_stats, weekday_stats = analysis.Process_retweets_for_feed( items ) |
|
599 |
+ weekend_stats, weekday_stats = analysis.Process_feed(items, rhs_metric, rhs_metric_times) |
|
762 | 600 |
|
763 | 601 |
# We'll only look at the stats for the time 1:00 to 1:30 after posting. |
764 | 602 |
weekend_median, weekend_mean, weekend_sigma = weekend_stats[2] |
... | ... |
@@ -772,17 +610,17 @@ if __name__=='__main__': |
772 | 610 |
else: |
773 | 611 |
threshold = weekday_threshold |
774 | 612 |
if item['qualified'] == -1: |
775 |
- for i in range( len( item['retweet_times'] ) ): |
|
776 |
- r_time = item['retweet_times'][i] |
|
613 |
+ for i in range(len(item[rhs_metric_times])): |
|
614 |
+ r_time = item[rhs_metric_times][i] |
|
777 | 615 |
if r_time - item['orig_posted'] < 5400: |
778 |
- if item['retweets'][i] >= threshold: |
|
616 |
+ if item[rhs_metric][i] >= threshold: |
|
779 | 617 |
item['qualified'] = threshold |
780 | 618 |
if r_time - item['orig_posted'] >= 3600: |
781 | 619 |
break |
782 | 620 |
|
783 | 621 |
# Automatically add those items whose authors and tags I like |
784 | 622 |
for item in items: |
785 |
- if item['qualified'] == -1 and len( item['retweet_times'] ) > 0: |
|
623 |
+ if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0: |
|
786 | 624 |
if item['author'].lower() in authors_to_post: |
787 | 625 |
item['qualified'] = threshold |
788 | 626 |
elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0: |
789 | 627 |