David Blume commited on 2018-01-20 20:14:24
Showing 1 changed files, with 29 additions and 17 deletions.
... | ... |
@@ -312,7 +312,8 @@ def process_yaml_item( yaml_item, cookie ): |
312 | 312 |
|
313 | 313 |
timecode_now = int( time.time() ) |
314 | 314 |
if len( yaml_item['comments'] ) < 8: |
315 |
- num_comments = Get_num_disqus_comments( yaml_item['link'], cookie ) |
|
315 |
+ disqus_id = Get_disqus_id( yaml_item ) |
|
316 |
+ num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie ) |
|
316 | 317 |
if num_comments != -1: |
317 | 318 |
any_entry_added = True |
318 | 319 |
yaml_item['comment_times'].append( timecode_now ) |
... | ... |
@@ -349,35 +350,35 @@ def Get_num_comments( url_string ): |
349 | 350 |
|
350 | 351 |
def Get_cookie( cookie_request ): |
351 | 352 |
cookie = cookielib.CookieJar() |
353 |
+ error_string = "Get_cookie didn't." |
|
352 | 354 |
try: |
353 | 355 |
cookie_response = urllib2.urlopen( cookie_request ) |
354 | 356 |
cookie.extract_cookies( cookie_response, cookie_request ) |
355 | 357 |
return cookie |
356 | 358 |
except urllib2.URLError, e: |
357 | 359 |
if hasattr( e, 'reason' ): |
358 |
- print "Get_cookie got an error:", e.reason |
|
360 |
+ error_string = "Get_cookie got an error: %s" % ( str( e.reason ) ) |
|
359 | 361 |
elif hasattr( e, 'code' ): |
360 |
- print "Get_cookie got an error. Code:", e.code |
|
362 |
+ error_string = "Get_cookie got an error. Code: %s" % ( str( e.code ) ) |
|
363 |
+ print error_string |
|
361 | 364 |
return None |
362 | 365 |
|
363 |
-def Get_num_disqus_comments( url_string, cookie ): |
|
364 |
- |
|
365 |
- if cookie == None: |
|
366 |
- return -1 |
|
367 |
- |
|
366 |
+def Get_disqus_id( yaml_item ): |
|
367 |
+ if 'disqus_id' in yaml_item: |
|
368 |
+ return yaml_item['disqus_id'] |
|
369 |
+ url_get_data = '' |
|
368 | 370 |
try: |
369 |
- f = urllib2.urlopen( url_string ) |
|
371 |
+ f = urllib2.urlopen( yaml_item['link'] ) |
|
370 | 372 |
data = f.read() |
371 | 373 |
f.close() |
372 | 374 |
except urllib2.URLError, e: |
373 | 375 |
if hasattr( e, 'reason' ): |
374 |
- print "Get_num_disqus_comments got an error:", e.reason |
|
376 |
+ print "Get_disqus_id got an error:", e.reason |
|
375 | 377 |
elif hasattr( e, 'code' ): |
376 |
- print "Get_num_disqus_comments got an error. Code:", e.code |
|
377 |
- return -1 |
|
378 |
+ print "Get_disqus_id got an error. Code:", e.code |
|
379 |
+ return url_get_data |
|
378 | 380 |
|
379 | 381 |
tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="' |
380 |
- disqus_tag_to_find = 'displayCount(' |
|
381 | 382 |
offset = data.find( tag_to_find ) |
382 | 383 |
if offset != -1: |
383 | 384 |
start_pos = offset + len( tag_to_find ) |
... | ... |
@@ -385,9 +386,19 @@ def Get_num_disqus_comments( url_string, cookie ): |
385 | 386 |
while data[ end_pos ] != '"' and end_pos < start_pos + 200: |
386 | 387 |
end_pos += 1 |
387 | 388 |
if end_pos < start_pos + 200: |
389 |
+ url_get_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
390 |
+ yaml_item['disqus_id'] = url_get_data |
|
391 |
+# else: |
|
392 |
+# print "Get_disqus_id could not find #comments anchor for", yaml_item['link'] |
|
393 |
+ return url_get_data |
|
394 |
+ |
|
395 |
+def Get_num_disqus_comments( url_string, disqus_id, cookie ): |
|
396 |
+ |
|
397 |
+ if cookie == None or disqus_id == '': |
|
398 |
+ return -1 |
|
399 |
+ |
|
388 | 400 |
opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cookie ) ) |
389 |
- url_GET_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' ) |
|
390 |
- request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + url_GET_data ) |
|
401 |
+ request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + disqus_id ) |
|
391 | 402 |
try: |
392 | 403 |
response = opener.open( request ) |
393 | 404 |
disqus_data = response.read() |
... | ... |
@@ -396,14 +407,15 @@ def Get_num_disqus_comments( url_string, cookie ): |
396 | 407 |
print "Get_num_disqus_comments got an error getting the count:", e.reason |
397 | 408 |
elif hasattr( e, 'code' ): |
398 | 409 |
print "Get_num_disqus_comments got an error getting the count. Code:", e.code |
399 |
- disqus_data = "" |
|
410 |
+ disqus_tag_to_find = 'displayCount(' |
|
400 | 411 |
disqus_offset = disqus_data.find( disqus_tag_to_find ) |
401 | 412 |
if disqus_offset != -1: |
402 | 413 |
start_pos = disqus_offset + len( disqus_tag_to_find ) |
403 | 414 |
end_pos = disqus_data.find( '}]})', start_pos ) |
404 | 415 |
if end_pos != -1: |
405 | 416 |
return int( json.loads( disqus_data[start_pos:end_pos+3] )['counts'][0]['comments'] ) |
406 |
- |
|
417 |
+ else: |
|
418 |
+ print "Get_num_disqus_comments found no disqus tag for", url_string |
|
407 | 419 |
return -1 |
408 | 420 |
|
409 | 421 |
def Get_num_retweets( url_string ): |
410 | 422 |