David Blume commited on 2018-01-21 14:55:04
Showing 2 changed files, with 28 additions and 63 deletions.
| ... | ... |
@@ -7,7 +7,11 @@ and decide which articles to include in its own feed. |
| 7 | 7 |
|
| 8 | 8 |
Here's a [blog post about it](http://david.dlma.com/blog/my-techcrunch-feed-filter). |
| 9 | 9 |
|
| 10 |
-# History |
|
| 10 |
+# To Do |
|
| 11 |
+ |
|
| 12 |
+* Maybe use Reddit upvotes |
|
| 13 |
+ |
|
| 14 |
+# Pre-Git History |
|
| 11 | 15 |
|
| 12 | 16 |
This was originally archived in a Subversion repo. I'd forgotten about the |
| 13 | 17 |
version control and had gotten into the habit of just modifying the production |
| ... | ... |
@@ -1,13 +1,8 @@ |
| 1 | 1 |
#!/usr/bin/env python |
| 2 | 2 |
# |
| 3 |
-# TODO: |
|
| 4 |
-# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>' |
|
| 5 |
-# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/" |
|
| 6 |
-# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
|
| 7 |
-# |
|
| 8 |
-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
|
| 9 |
-# |
|
| 10 |
- |
|
| 3 |
+# Testing without affecting the yaml file and saving the updated one aside: |
|
| 4 |
+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \ |
|
| 5 |
+# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
|
| 11 | 6 |
import feedparser |
| 12 | 7 |
import yaml |
| 13 | 8 |
import sys |
| ... | ... |
@@ -32,6 +27,7 @@ import texttime |
| 32 | 27 |
import operator |
| 33 | 28 |
from datetime import timedelta |
| 34 | 29 |
import cgi |
| 30 |
+import smtp_creds # Your own credentials, used in send_email() |
|
| 35 | 31 |
|
| 36 | 32 |
debug = True |
| 37 | 33 |
any_entry_added = False |
| ... | ... |
@@ -102,10 +98,11 @@ def asciiize(s): |
| 102 | 98 |
return s |
| 103 | 99 |
|
| 104 | 100 |
|
| 105 |
-def sendEmail(subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>'): |
|
| 101 |
+def send_email(subject, message, toaddrs, |
|
| 102 |
+ fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)): |
|
| 106 | 103 |
""" Sends Email """ |
| 107 |
- smtp = smtplib.SMTP('mail.dlma.com', port=587)
|
|
| 108 |
- smtp.login(user, passw) |
|
| 104 |
+ smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port) |
|
| 105 |
+ smtp.login(smtp_creds.user, smtp_creds.passw) |
|
| 109 | 106 |
smtp.sendmail(fromaddr, \ |
| 110 | 107 |
toaddrs, \ |
| 111 | 108 |
"Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \ |
| ... | ... |
@@ -125,8 +122,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
| 125 | 122 |
# lhs_times, lhs_values = zip(*comments) |
| 126 | 123 |
# rhs_times, rhs_values = zip(*rhs) |
| 127 | 124 |
|
| 128 |
- # TODO handle failure cases, -1 |
|
| 129 |
- |
|
| 130 | 125 |
if not len(lhs_times): |
| 131 | 126 |
lhs_times = [time_posted,] |
| 132 | 127 |
if not len(lhs_values): |
| ... | ... |
@@ -136,8 +131,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
| 136 | 131 |
if not len(rhs_values): |
| 137 | 132 |
rhs_values = [0,] |
| 138 | 133 |
|
| 139 |
-# lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times] |
|
| 140 |
-# rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times] |
|
| 141 | 134 |
lhs_times = [(i - time_posted) / 1800 for i in lhs_times] |
| 142 | 135 |
rhs_times = [(i - time_posted) / 1800 for i in rhs_times] |
| 143 | 136 |
|
| ... | ... |
@@ -212,11 +205,9 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
| 212 | 205 |
|
| 213 | 206 |
|
| 214 | 207 |
def process_feed(yaml_items): |
| 215 |
- """ |
|
| 216 |
- Retrieve the url and process it. |
|
| 208 |
+ """Retrieve the url and process it. |
|
| 217 | 209 |
feed_info (in, out) A tuple that describes an individual feed, like its name and etag. |
| 218 | 210 |
""" |
| 219 |
- |
|
| 220 | 211 |
feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
|
| 221 | 212 |
if hasattr(feed, 'status'): |
| 222 | 213 |
if feed.status == 304: |
| ... | ... |
@@ -399,22 +390,7 @@ def Get_reddit_stats(url_string): |
| 399 | 390 |
|
| 400 | 391 |
|
| 401 | 392 |
def Get_fb_stats(url_string): |
| 402 |
- """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want, |
|
| 403 |
- and two, with URL id. They go like this: |
|
| 404 |
- |
|
| 405 |
- FQL: |
|
| 406 |
- |
|
| 407 |
- u = urllib.quote_plus(url_string) |
|
| 408 |
- urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u))
|
|
| 409 |
- |
|
| 410 |
- URL ID: |
|
| 411 |
- |
|
| 412 |
- u = urllib.quote_plus(url_string) |
|
| 413 |
- with open('facebook-token.txt', 'r') as f:
|
|
| 414 |
- token = f.read() |
|
| 415 |
- encoded = urllib.urlencode({'access_token': token})
|
|
| 416 |
- urllib2.urlopen('https://graph.facebook.com/vX.Y/?id=%s&%s' % (u, encoded)
|
|
| 417 |
- """ |
|
| 393 |
+ """Use graph's "engagement" field to get reactions and shares.""" |
|
| 418 | 394 |
shares = -1 |
| 419 | 395 |
comments = -1 |
| 420 | 396 |
likes = -1 |
| ... | ... |
@@ -423,21 +399,7 @@ def Get_fb_stats(url_string): |
| 423 | 399 |
|
| 424 | 400 |
try: |
| 425 | 401 |
encoded = urllib.urlencode({'access_token': facebook_token})
|
| 426 |
-# url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27&%s' |
|
| 427 |
- # This stopped working 2018-01-13, 11:25, after I told Facebook the app would use v2.11 |
|
| 428 |
- # https://developers.facebook.com/docs/graph-api/changelog/version2.9#gapi-deprecate |
|
| 429 |
- # url = 'https://graph.facebook.com/v2.8/?id=%s&fields=og_object{engagement},share&%s'
|
|
| 430 |
- |
|
| 431 |
- # Consider the following for a different engagement field: |
|
| 432 |
- # "engagement": {
|
|
| 433 |
- # "reaction_count": 115, |
|
| 434 |
- # "comment_count": 0, |
|
| 435 |
- # "share_count": 102, |
|
| 436 |
- # "comment_plugin_count": 0 |
|
| 437 |
- # }, |
|
| 438 |
- # Where reaction_count + share_count = og_object.engagement.count |
|
| 439 | 402 |
url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s' |
| 440 |
- |
|
| 441 | 403 |
f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded)) |
| 442 | 404 |
data = f.read() |
| 443 | 405 |
f.close() |
| ... | ... |
@@ -486,18 +448,18 @@ def Get_fb_stats(url_string): |
| 486 | 448 |
return shares, comments, likes |
| 487 | 449 |
|
| 488 | 450 |
|
| 489 |
-def Save_image(url_string, file_path): |
|
| 451 |
+def save_image(url_string, file_path): |
|
| 490 | 452 |
try: |
| 491 | 453 |
f = urllib2.urlopen(url_string) |
| 492 | 454 |
data = f.read() |
| 493 | 455 |
f.close() |
| 494 | 456 |
except (urllib2.URLError, httplib.BadStatusLine) as e: |
| 495 | 457 |
if hasattr(e, 'reason'): # URLError |
| 496 |
- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
|
|
| 458 |
+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
|
|
| 497 | 459 |
elif hasattr(e, 'code'): # URLError |
| 498 |
- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
|
|
| 460 |
+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
|
|
| 499 | 461 |
else: |
| 500 |
- print "Save_image: Error from urlopen", e |
|
| 462 |
+ print "save_image: Error from urlopen", e |
|
| 501 | 463 |
return url_string |
| 502 | 464 |
|
| 503 | 465 |
if len(data) > 50: |
| ... | ... |
@@ -507,7 +469,8 @@ def Save_image(url_string, file_path): |
| 507 | 469 |
return url_string |
| 508 | 470 |
|
| 509 | 471 |
|
| 510 |
-def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
|
| 472 |
+def make_index_html(yaml_items, weekend_stats, weekday_stats): |
|
| 473 |
+ """Writes a static index.html file from the YAML items.""" |
|
| 511 | 474 |
cur_time = int(time.time()) |
| 512 | 475 |
new_index_fullpath = os.path.join(localdir, 'index.html_new') |
| 513 | 476 |
index_fullpath = os.path.join(localdir, 'index.html') |
| ... | ... |
@@ -541,10 +502,7 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
| 541 | 502 |
image_index % 2, |
| 542 | 503 |
tag_hit |
| 543 | 504 |
) |
| 544 |
-# if image['title'].startswith( 'Too ' ): |
|
| 545 |
-# print image['title'], image['qualified'], image['rhs_times'] |
|
| 546 |
-# print chart_url |
|
| 547 |
- image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
|
| 505 |
+ image_url = save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
|
| 548 | 506 |
f.write('<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
|
| 549 | 507 |
(image_index % 2 and "even" or "odd", |
| 550 | 508 |
image['link'], |
| ... | ... |
@@ -568,7 +526,8 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
| 568 | 526 |
os.unlink(fname) |
| 569 | 527 |
|
| 570 | 528 |
|
| 571 |
-def Make_feed_file(yaml_items): |
|
| 529 |
+def make_feed_file(yaml_items): |
|
| 530 |
+ """Writes the RSS feed file with the YAML items.""" |
|
| 572 | 531 |
with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f: |
| 573 | 532 |
f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
|
| 574 | 533 |
f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
|
| ... | ... |
@@ -631,7 +590,7 @@ if __name__=='__main__': |
| 631 | 590 |
print "could not open", yaml_fullpath |
| 632 | 591 |
items = [] |
| 633 | 592 |
|
| 634 |
- with open('facebook-token.txt', 'r') as f:
|
|
| 593 |
+ with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f: |
|
| 635 | 594 |
facebook_token = f.read() |
| 636 | 595 |
|
| 637 | 596 |
progress_text = ["read techcrunch.yaml"] |
| ... | ... |
@@ -688,9 +647,9 @@ if __name__=='__main__': |
| 688 | 647 |
with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f: |
| 689 | 648 |
yaml.dump(items, f, encoding='utf-8', width=120) |
| 690 | 649 |
|
| 691 |
- Make_feed_file(items) |
|
| 650 |
+ make_feed_file(items) |
|
| 692 | 651 |
|
| 693 |
- Make_index_html(items, weekend_stats, weekday_stats) |
|
| 652 |
+ make_index_html(items, weekend_stats, weekday_stats) |
|
| 694 | 653 |
else: |
| 695 | 654 |
print "No entries were added this time." |
| 696 | 655 |
|
| ... | ... |
@@ -699,7 +658,7 @@ if __name__=='__main__': |
| 699 | 658 |
print exceptional_text, ' '.join(progress_text) |
| 700 | 659 |
traceback.print_exc(file=sys.stdout) |
| 701 | 660 |
try: |
| 702 |
- sendEmail('Exception thrown in techcrunch.py',
|
|
| 661 |
+ send_email('Exception thrown in techcrunch.py',
|
|
| 703 | 662 |
exceptional_text + "\n" + traceback.format_exc(), |
| 704 | 663 |
('david.blume@gmail.com',))
|
| 705 | 664 |
except Exception as e: |
| 706 | 665 |