2010-09-03: Save off the disqus identifier for use later.
David Blume

David Blume commited on 2018-01-20 20:14:24
Showing 1 changed files, with 29 additions and 17 deletions.

... ...
@@ -312,7 +312,8 @@ def process_yaml_item( yaml_item, cookie ):
312 312
 
313 313
     timecode_now = int( time.time() )
314 314
     if len( yaml_item['comments'] ) < 8:
315
-        num_comments = Get_num_disqus_comments( yaml_item['link'], cookie )
315
+        disqus_id = Get_disqus_id( yaml_item )
316
+        num_comments = Get_num_disqus_comments( yaml_item['link'], disqus_id, cookie )
316 317
         if num_comments != -1:
317 318
             any_entry_added = True
318 319
             yaml_item['comment_times'].append( timecode_now )
... ...
@@ -349,35 +350,35 @@ def Get_num_comments( url_string ):
349 350
 
350 351
 def Get_cookie( cookie_request ):
351 352
     cookie = cookielib.CookieJar()
353
+    error_string = "Get_cookie didn't."
352 354
     try:
353 355
         cookie_response = urllib2.urlopen( cookie_request )
354 356
         cookie.extract_cookies( cookie_response, cookie_request )
355 357
         return cookie
356 358
     except urllib2.URLError, e:
357 359
         if hasattr( e, 'reason' ):
358
-            print "Get_cookie got an error:", e.reason
360
+            error_string = "Get_cookie got an error: %s" % ( str( e.reason ) )
359 361
     elif hasattr( e, 'code' ):
360
-            print "Get_cookie got an error. Code:", e.code
362
+            error_string = "Get_cookie got an error. Code: %s" % ( str( e.code ) )
363
+    print error_string
361 364
     return None
362 365
 
363
-def Get_num_disqus_comments( url_string, cookie ):
364
-
365
-    if cookie == None:
366
-        return -1
367
-
366
+def Get_disqus_id( yaml_item ):
367
+    if 'disqus_id' in yaml_item:
368
+        return yaml_item['disqus_id']
369
+    url_get_data = ''
368 370
     try:
369
-        f = urllib2.urlopen( url_string )
371
+        f = urllib2.urlopen( yaml_item['link'] )
370 372
         data = f.read()
371 373
         f.close()
372 374
     except urllib2.URLError, e:
373 375
         if hasattr( e, 'reason' ):
374
-            print "Get_num_disqus_comments got an error:", e.reason
376
+            print "Get_disqus_id got an error:", e.reason
375 377
     elif hasattr( e, 'code' ):
376
-            print "Get_num_disqus_comments got an error. Code:", e.code
377
-        return -1
378
+            print "Get_disqus_id got an error. Code:", e.code
379
+        return url_get_data
378 380
 
379 381
     tag_to_find = '<a href="#comments" rel="nofollow"><span class="dsq-postid" rel="'
380
-    disqus_tag_to_find = 'displayCount('
381 382
     offset = data.find( tag_to_find )
382 383
     if offset != -1:
383 384
         start_pos = offset + len( tag_to_find )
... ...
@@ -385,9 +386,19 @@ def Get_num_disqus_comments( url_string, cookie ):
385 386
         while data[ end_pos ] != '"' and end_pos < start_pos + 200:
386 387
             end_pos += 1
387 388
         if end_pos < start_pos + 200:
389
+        url_get_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' )
390
+        yaml_item['disqus_id'] = url_get_data
391
+#    else:
392
+#        print "Get_disqus_id could not find #comments anchor for", yaml_item['link']
393
+    return url_get_data
394
+
395
+def Get_num_disqus_comments( url_string, disqus_id, cookie ):
396
+
397
+    if cookie == None or disqus_id == '':
398
+        return -1
399
+
388 400
     opener = urllib2.build_opener( urllib2.HTTPCookieProcessor( cookie ) )
389
-            url_GET_data = urllib.quote_plus( data[start_pos:end_pos] ).replace( '+', '%20' )
390
-            request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + url_GET_data )
401
+    request = urllib2.Request( 'http://disqus.com/forums/techcrunch/count.js?q=1&0=1,' + disqus_id )
391 402
     try:
392 403
         response = opener.open( request )
393 404
         disqus_data = response.read()
... ...
@@ -396,14 +407,15 @@ def Get_num_disqus_comments( url_string, cookie ):
396 407
             print "Get_num_disqus_comments got an error getting the count:", e.reason
397 408
         elif hasattr( e, 'code' ):
398 409
             print "Get_num_disqus_comments got an error getting the count. Code:", e.code
399
-        disqus_data = ""
410
+    disqus_tag_to_find = 'displayCount('
400 411
     disqus_offset = disqus_data.find( disqus_tag_to_find )
401 412
     if disqus_offset != -1:
402 413
         start_pos = disqus_offset + len( disqus_tag_to_find )
403 414
         end_pos = disqus_data.find( '}]})', start_pos )
404 415
         if end_pos != -1:
405 416
             return int( json.loads( disqus_data[start_pos:end_pos+3] )['counts'][0]['comments'] )
406
-
417
+    else:
418
+        print "Get_num_disqus_comments found no disqus tag for", url_string
407 419
     return -1
408 420
 
409 421
 def Get_num_retweets( url_string ):
410 422