David Blume commited on 2018-01-21 14:55:04
Showing 2 changed files, with 28 additions and 63 deletions.
... | ... |
@@ -7,7 +7,11 @@ and decide which articles to include in its own feed. |
7 | 7 |
|
8 | 8 |
Here's a [blog post about it](http://david.dlma.com/blog/my-techcrunch-feed-filter). |
9 | 9 |
|
10 |
-# History |
|
10 |
+# To Do |
|
11 |
+ |
|
12 |
+* Maybe use Reddit upvotes |
|
13 |
+ |
|
14 |
+# Pre-Git History |
|
11 | 15 |
|
12 | 16 |
This was originally archived in a Subversion repo. I'd forgotten about the |
13 | 17 |
version control and had gotten into the habit of just modifying the production |
... | ... |
@@ -1,13 +1,8 @@ |
1 | 1 |
#!/usr/bin/env python |
2 | 2 |
# |
3 |
-# TODO: |
|
4 |
-# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>' |
|
5 |
-# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/" |
|
6 |
-# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg" |
|
7 |
-# |
|
8 |
-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
|
9 |
-# |
|
10 |
- |
|
3 |
+# Testing without affecting the yaml file and saving the updated one aside: |
|
4 |
+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \ |
|
5 |
+# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
|
11 | 6 |
import feedparser |
12 | 7 |
import yaml |
13 | 8 |
import sys |
... | ... |
@@ -32,6 +27,7 @@ import texttime |
32 | 27 |
import operator |
33 | 28 |
from datetime import timedelta |
34 | 29 |
import cgi |
30 |
+import smtp_creds # Your own credentials, used in send_email() |
|
35 | 31 |
|
36 | 32 |
debug = True |
37 | 33 |
any_entry_added = False |
... | ... |
@@ -102,10 +98,11 @@ def asciiize(s): |
102 | 98 |
return s |
103 | 99 |
|
104 | 100 |
|
105 |
-def sendEmail(subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>'): |
|
101 |
+def send_email(subject, message, toaddrs, |
|
102 |
+ fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)): |
|
106 | 103 |
""" Sends Email """ |
107 |
- smtp = smtplib.SMTP('mail.dlma.com', port=587) |
|
108 |
- smtp.login(user, passw) |
|
104 |
+ smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port) |
|
105 |
+ smtp.login(smtp_creds.user, smtp_creds.passw) |
|
109 | 106 |
smtp.sendmail(fromaddr, \ |
110 | 107 |
toaddrs, \ |
111 | 108 |
"Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \ |
... | ... |
@@ -125,8 +122,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
125 | 122 |
# lhs_times, lhs_values = zip(*comments) |
126 | 123 |
# rhs_times, rhs_values = zip(*rhs) |
127 | 124 |
|
128 |
- # TODO handle failure cases, -1 |
|
129 |
- |
|
130 | 125 |
if not len(lhs_times): |
131 | 126 |
lhs_times = [time_posted,] |
132 | 127 |
if not len(lhs_values): |
... | ... |
@@ -136,8 +131,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
136 | 131 |
if not len(rhs_values): |
137 | 132 |
rhs_values = [0,] |
138 | 133 |
|
139 |
-# lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times] |
|
140 |
-# rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times] |
|
141 | 134 |
lhs_times = [(i - time_posted) / 1800 for i in lhs_times] |
142 | 135 |
rhs_times = [(i - time_posted) / 1800 for i in rhs_times] |
143 | 136 |
|
... | ... |
@@ -212,11 +205,9 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times, |
212 | 205 |
|
213 | 206 |
|
214 | 207 |
def process_feed(yaml_items): |
215 |
- """ |
|
216 |
- Retrieve the url and process it. |
|
208 |
+ """Retrieve the url and process it. |
|
217 | 209 |
feed_info (in, out) A tuple that describes an individual feed, like its name and etag. |
218 | 210 |
""" |
219 |
- |
|
220 | 211 |
feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch') |
221 | 212 |
if hasattr(feed, 'status'): |
222 | 213 |
if feed.status == 304: |
... | ... |
@@ -399,22 +390,7 @@ def Get_reddit_stats(url_string): |
399 | 390 |
|
400 | 391 |
|
401 | 392 |
def Get_fb_stats(url_string): |
402 |
- """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want, |
|
403 |
- and two, with URL id. They go like this: |
|
404 |
- |
|
405 |
- FQL: |
|
406 |
- |
|
407 |
- u = urllib.quote_plus(url_string) |
|
408 |
- urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u)) |
|
409 |
- |
|
410 |
- URL ID: |
|
411 |
- |
|
412 |
- u = urllib.quote_plus(url_string) |
|
413 |
- with open('facebook-token.txt', 'r') as f: |
|
414 |
- token = f.read() |
|
415 |
- encoded = urllib.urlencode({'access_token': token}) |
|
416 |
- urllib2.urlopen('https://graph.facebook.com/vX.Y/?id=%s&%s' % (u, encoded) |
|
417 |
- """ |
|
393 |
+ """Use graph's "engagement" field to get reactions and shares.""" |
|
418 | 394 |
shares = -1 |
419 | 395 |
comments = -1 |
420 | 396 |
likes = -1 |
... | ... |
@@ -423,21 +399,7 @@ def Get_fb_stats(url_string): |
423 | 399 |
|
424 | 400 |
try: |
425 | 401 |
encoded = urllib.urlencode({'access_token': facebook_token}) |
426 |
-# url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27&%s' |
|
427 |
- # This stopped working 2018-01-13, 11:25, after I told Facebook the app would use v2.11 |
|
428 |
- # https://developers.facebook.com/docs/graph-api/changelog/version2.9#gapi-deprecate |
|
429 |
- # url = 'https://graph.facebook.com/v2.8/?id=%s&fields=og_object{engagement},share&%s' |
|
430 |
- |
|
431 |
- # Consider the following for a different engagement field: |
|
432 |
- # "engagement": { |
|
433 |
- # "reaction_count": 115, |
|
434 |
- # "comment_count": 0, |
|
435 |
- # "share_count": 102, |
|
436 |
- # "comment_plugin_count": 0 |
|
437 |
- # }, |
|
438 |
- # Where reaction_count + share_count = og_object.engagement.count |
|
439 | 402 |
url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s' |
440 |
- |
|
441 | 403 |
f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded)) |
442 | 404 |
data = f.read() |
443 | 405 |
f.close() |
... | ... |
@@ -486,18 +448,18 @@ def Get_fb_stats(url_string): |
486 | 448 |
return shares, comments, likes |
487 | 449 |
|
488 | 450 |
|
489 |
-def Save_image(url_string, file_path): |
|
451 |
+def save_image(url_string, file_path): |
|
490 | 452 |
try: |
491 | 453 |
f = urllib2.urlopen(url_string) |
492 | 454 |
data = f.read() |
493 | 455 |
f.close() |
494 | 456 |
except (urllib2.URLError, httplib.BadStatusLine) as e: |
495 | 457 |
if hasattr(e, 'reason'): # URLError |
496 |
- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason |
|
458 |
+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason |
|
497 | 459 |
elif hasattr(e, 'code'): # URLError |
498 |
- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code |
|
460 |
+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code |
|
499 | 461 |
else: |
500 |
- print "Save_image: Error from urlopen", e |
|
462 |
+ print "save_image: Error from urlopen", e |
|
501 | 463 |
return url_string |
502 | 464 |
|
503 | 465 |
if len(data) > 50: |
... | ... |
@@ -507,7 +469,8 @@ def Save_image(url_string, file_path): |
507 | 469 |
return url_string |
508 | 470 |
|
509 | 471 |
|
510 |
-def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
|
472 |
+def make_index_html(yaml_items, weekend_stats, weekday_stats): |
|
473 |
+ """Writes a static index.html file from the YAML items.""" |
|
511 | 474 |
cur_time = int(time.time()) |
512 | 475 |
new_index_fullpath = os.path.join(localdir, 'index.html_new') |
513 | 476 |
index_fullpath = os.path.join(localdir, 'index.html') |
... | ... |
@@ -541,10 +502,7 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
541 | 502 |
image_index % 2, |
542 | 503 |
tag_hit |
543 | 504 |
) |
544 |
-# if image['title'].startswith( 'Too ' ): |
|
545 |
-# print image['title'], image['qualified'], image['rhs_times'] |
|
546 |
-# print chart_url |
|
547 |
- image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
|
505 |
+ image_url = save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index))) |
|
548 | 506 |
f.write('<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \ |
549 | 507 |
(image_index % 2 and "even" or "odd", |
550 | 508 |
image['link'], |
... | ... |
@@ -568,7 +526,8 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats): |
568 | 526 |
os.unlink(fname) |
569 | 527 |
|
570 | 528 |
|
571 |
-def Make_feed_file(yaml_items): |
|
529 |
+def make_feed_file(yaml_items): |
|
530 |
+ """Writes the RSS feed file with the YAML items.""" |
|
572 | 531 |
with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f: |
573 | 532 |
f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>") |
574 | 533 |
f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))) |
... | ... |
@@ -631,7 +590,7 @@ if __name__=='__main__': |
631 | 590 |
print "could not open", yaml_fullpath |
632 | 591 |
items = [] |
633 | 592 |
|
634 |
- with open('facebook-token.txt', 'r') as f: |
|
593 |
+ with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f: |
|
635 | 594 |
facebook_token = f.read() |
636 | 595 |
|
637 | 596 |
progress_text = ["read techcrunch.yaml"] |
... | ... |
@@ -688,9 +647,9 @@ if __name__=='__main__': |
688 | 647 |
with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f: |
689 | 648 |
yaml.dump(items, f, encoding='utf-8', width=120) |
690 | 649 |
|
691 |
- Make_feed_file(items) |
|
650 |
+ make_feed_file(items) |
|
692 | 651 |
|
693 |
- Make_index_html(items, weekend_stats, weekday_stats) |
|
652 |
+ make_index_html(items, weekend_stats, weekday_stats) |
|
694 | 653 |
else: |
695 | 654 |
print "No entries were added this time." |
696 | 655 |
|
... | ... |
@@ -699,7 +658,7 @@ if __name__=='__main__': |
699 | 658 |
print exceptional_text, ' '.join(progress_text) |
700 | 659 |
traceback.print_exc(file=sys.stdout) |
701 | 660 |
try: |
702 |
- sendEmail('Exception thrown in techcrunch.py', |
|
661 |
+ send_email('Exception thrown in techcrunch.py', |
|
703 | 662 |
exceptional_text + "\n" + traceback.format_exc(), |
704 | 663 |
('david.blume@gmail.com',)) |
705 | 664 |
except Exception as e: |
706 | 665 |