David Blume commited on 2018-01-20 20:14:24
Showing 1 changed files, with 29 additions and 17 deletions.
| ... | ... |
@@ -312,7 +312,8 @@ def process_yaml_item( yaml_item, cookie ): |
| 312 | 312 |
|
| 313 | 313 |
timecode_now = int( time.time() ) |
| 314 | 314 |
if len( yaml_item['comments'] ) < 8: |
| 315 |
- num_comments = Get_num_disqus_comments( yaml_item['link'], cookie ) |
|
| 315 |
+ disqus_id = Get_disqus_id( yaml_item ) |
|
| 316 |
+ num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie ) |
|
| 316 | 317 |
if num_comments != -1: |
| 317 | 318 |
any_entry_added = True |
| 318 | 319 |
yaml_item['comment_times'].append( timecode_now ) |
| ... | ... |
@@ -349,35 +350,35 @@ def Get_num_comments( url_string ): |
| 349 | 350 |
|
| 350 | 351 |
def Get_cookie( cookie_request ): |
| 351 | 352 |
cookie = cookielib.CookieJar() |
| 353 |
+ error_string = "Get_cookie didn't." |
|
| 352 | 354 |
try: |
| 353 | 355 |
cookie_response = urllib2.urlopen( cookie_request ) |
| 354 | 356 |
cookie.extract_cookies( cookie_response, cookie_request ) |
| 355 | 357 |
return cookie |
| 356 | 358 |
except urllib2.URLError, e: |
| 357 | 359 |
if hasattr( e, 'reason' ): |
| 358 |
- print "Get_cookie got an error:", e.reason |
|
| 360 |
+ error_string = "Get_cookie got an error: %s" % ( str( e.reason ) ) |
|
| 359 | 361 |
elif hasattr( e, 'code' ): |
| 360 |
- print "Get_cookie got an error. Code:", e.code |
|
| 362 |
+ error_string = "Get_cookie got an error. Code: %s" % ( str( e.code ) ) |
|
| 363 |
+ print error_string |
|
| 361 | 364 |
return None |
| 362 | 365 |
|
| 363 |
-def Get_num_disqus_comments( url_string, cookie ): |
|
| 364 |
- |
|
| 365 |
- if cookie == None: |
|
| 366 |
- return -1 |
|
| 367 |
- |
|
| 366 |
+def Get_disqus_id( yaml_item ): |
|
| 367 |
+ if 'disqus_id' in yaml_item: |
|
| 368 |
+ return yaml_item['disqus_id'] |
|
| 369 |
+ url_get_data = '' |
|
| 368 | 370 |
try: |
| 369 |
- f = urllib2.urlopen( url_string ) |
|
| 371 |
+ f = urllib2.urlopen( yaml_item['link'] ) |
|
| 370 | 372 |
data = f.read() |
| 371 | 373 |
f.close() |
| 372 | 374 |
except urllib2.URLError, e: |
| 373 | 375 |
if hasattr( e, 'reason' ): |
| 374 |
- print "Get_num_disqus_comments got an error:", e.reason |
|
| 376 |
+ print "Get_disqus_id got an error:", e.reason |
|
| 375 | 377 |
elif hasattr( e, 'code' ): |
| 376 |
- print "Get_num_disqus_comments got an error. Code:", e.code |
|
| 377 |
- return -1 |
|
| 378 |
+ print "Get_disqus_id got an error. Code:", e.code |
|
| 379 |
+ return url_get_data |
|
| 378 | 380 |
|
| 379 | 381 |
tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="' |
| 380 |
- disqus_tag_to_find = 'displayCount('
|
|
| 381 | 382 |
offset = data.find( tag_to_find ) |
| 382 | 383 |
if offset != -1: |
| 383 | 384 |
start_pos = offset + len( tag_to_find ) |
| ... | ... |
@@ -385,9 +386,19 @@ def Get_num_disqus_comments( url_string, cookie ): |
| 385 | 386 |
while data[ end_pos ] != '"' and end_pos < start_pos + 200: |
| 386 | 387 |
end_pos += 1 |
| 387 | 388 |
if end_pos < start_pos + 200: |
| 389 |
+ url_get_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
| 390 |
+ yaml_item['disqus_id'] = url_get_data |
|
| 391 |
+# else: |
|
| 392 |
+# print "Get_disqus_id could not find #comments anchor for", yaml_item['link'] |
|
| 393 |
+ return url_get_data |
|
| 394 |
+ |
|
| 395 |
+def Get_num_disqus_comments( url_string, disqus_id, cookie ): |
|
| 396 |
+ |
|
| 397 |
+ if cookie == None or disqus_id == '': |
|
| 398 |
+ return -1 |
|
| 399 |
+ |
|
| 388 | 400 |
opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cookie ) ) |
| 389 |
- url_GET_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
| 390 |
- request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + url_GET_data ) |
|
| 401 |
+ request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + disqus_id ) |
|
| 391 | 402 |
try: |
| 392 | 403 |
response = opener.open( request ) |
| 393 | 404 |
disqus_data = response.read() |
| ... | ... |
@@ -396,14 +407,15 @@ def Get_num_disqus_comments( url_string, cookie ): |
| 396 | 407 |
print "Get_num_disqus_comments got an error getting the count:", e.reason |
| 397 | 408 |
elif hasattr( e, 'code' ): |
| 398 | 409 |
print "Get_num_disqus_comments got an error getting the count. Code:", e.code |
| 399 |
- disqus_data = "" |
|
| 410 |
+ disqus_tag_to_find = 'displayCount('
|
|
| 400 | 411 |
disqus_offset = disqus_data.find( disqus_tag_to_find ) |
| 401 | 412 |
if disqus_offset != -1: |
| 402 | 413 |
start_pos = disqus_offset + len( disqus_tag_to_find ) |
| 403 | 414 |
end_pos = disqus_data.find( '}]})', start_pos ) |
| 404 | 415 |
if end_pos != -1: |
| 405 | 416 |
return int( json.loads( disqus_data[start_pos:end_pos+3] )['counts'][0]['comments'] ) |
| 406 |
- |
|
| 417 |
+ else: |
|
| 418 |
+ print "Get_num_disqus_comments found no disqus tag for", url_string |
|
| 407 | 419 |
return -1 |
| 408 | 420 |
|
| 409 | 421 |
def Get_num_retweets( url_string ): |
| 410 | 422 |