Moved SMTP credentials to smtp_creds (297a766) - techcrunch.git

techcrunch.py

@@ -1,13 +1,8 @@
 #!/usr/bin/env python
 #
-# TODO:
-# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry &raquo;</a>'
-#   link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
-# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
-#
-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
-#
-
+# Testing without affecting the yaml file and saving the updated one aside:
+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
+# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
 import feedparser
 import yaml
 import sys
@@ -32,6 +27,7 @@ import texttime
 import operator
 from datetime import timedelta
 import cgi
+import smtp_creds  # Your own credentials, used in send_email()
 
 debug = True
 any_entry_added = False
@@ -102,10 +98,11 @@ def asciiize(s):
         return s
 
 
-def sendEmail(subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>'):
+def send_email(subject, message, toaddrs,
+        fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
     """ Sends Email """
-    smtp = smtplib.SMTP('mail.dlma.com', port=587)
-    smtp.login(user, passw)
+    smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
+    smtp.login(smtp_creds.user, smtp_creds.passw)
     smtp.sendmail(fromaddr, \
                   toaddrs, \
                   "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
@@ -125,8 +122,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
 #    lhs_times, lhs_values = zip(*comments)
 #    rhs_times, rhs_values = zip(*rhs)
 
-    # TODO handle failure cases, -1
-
     if not len(lhs_times):
         lhs_times = [time_posted,]
     if not len(lhs_values):
@@ -136,8 +131,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
     if not len(rhs_values):
         rhs_values = [0,]
 
-#    lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times]
-#    rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times]
     lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
     rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
 
@@ -212,11 +205,9 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
 
 
 def process_feed(yaml_items):
-    """
-    Retrieve the url and process it.
+    """Retrieve the url and process it.
     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
     """
-
     feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
     if hasattr(feed, 'status'):
         if feed.status == 304:
@@ -399,22 +390,7 @@ def Get_reddit_stats(url_string):
 
 
 def Get_fb_stats(url_string):
-    """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want,
-    and two, with URL id. They go like this:
-
-    FQL:
-
-    u = urllib.quote_plus(url_string)
-    urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u))
-
-    URL ID:
-
-    u = urllib.quote_plus(url_string)
-    with open('facebook-token.txt', 'r') as f:
-        token = f.read()
-    encoded = urllib.urlencode({'access_token': token})
-    urllib2.urlopen('https://graph.facebook.com/vX.Y/?id=%s&%s' % (u, encoded)
-    """
+    """Use graph's "engagement" field to get reactions and shares."""
     shares = -1
     comments = -1
     likes = -1
@@ -423,21 +399,7 @@ def Get_fb_stats(url_string):
 
     try:
         encoded = urllib.urlencode({'access_token': facebook_token})
-#        url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27&%s'
-        # This stopped working 2018-01-13, 11:25, after I told Facebook the app would use v2.11
-        # https://developers.facebook.com/docs/graph-api/changelog/version2.9#gapi-deprecate
-        # url = 'https://graph.facebook.com/v2.8/?id=%s&fields=og_object{engagement},share&%s'
-
-        # Consider the following for a different engagement field:
-        #   "engagement": {
-        #     "reaction_count": 115,
-        #     "comment_count": 0,
-        #     "share_count": 102,
-        #     "comment_plugin_count": 0
-        #   },
-        # Where reaction_count + share_count = og_object.engagement.count
         url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
-
         f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
         data = f.read()
         f.close()
@@ -486,18 +448,18 @@ def Get_fb_stats(url_string):
     return shares, comments, likes
 
 
-def Save_image(url_string, file_path):
+def save_image(url_string, file_path):
     try:
         f = urllib2.urlopen(url_string)
         data = f.read()
         f.close()
     except (urllib2.URLError, httplib.BadStatusLine) as e:
         if hasattr(e, 'reason'): # URLError
-            print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
+            print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
         elif hasattr(e, 'code'): # URLError
-            print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
+            print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
         else:
-            print "Save_image: Error from urlopen", e
+            print "save_image: Error from urlopen", e
         return url_string
 
     if len(data) > 50:
@@ -507,7 +469,8 @@ def Save_image(url_string, file_path):
     return url_string
 
 
-def Make_index_html(yaml_items, weekend_stats, weekday_stats):
+def make_index_html(yaml_items, weekend_stats, weekday_stats):
+    """Writes a static index.html file from the YAML items."""
     cur_time = int(time.time())
     new_index_fullpath = os.path.join(localdir, 'index.html_new')
     index_fullpath = os.path.join(localdir, 'index.html')
@@ -541,10 +502,7 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats):
                                        image_index % 2,
                                        tag_hit
                                       )
-#        if image['title'].startswith( 'Too ' ):
-#            print image['title'], image['qualified'], image['rhs_times']
-#            print chart_url
-            image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index)))
+            image_url = save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index)))
             f.write('<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
                      (image_index % 2 and "even" or "odd",
                       image['link'],
@@ -568,7 +526,8 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats):
         os.unlink(fname)
 
 
-def Make_feed_file(yaml_items):
+def make_feed_file(yaml_items):
+    """Writes the RSS feed file with the YAML items."""
     with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
         f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
         f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
@@ -631,7 +590,7 @@ if __name__=='__main__':
             print "could not open", yaml_fullpath
             items = []
 
-        with open('facebook-token.txt', 'r') as f:
+        with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
             facebook_token = f.read()
 
         progress_text = ["read techcrunch.yaml"]
@@ -688,9 +647,9 @@ if __name__=='__main__':
             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
                 yaml.dump(items, f, encoding='utf-8', width=120)
 
-            Make_feed_file(items)
+            make_feed_file(items)
 
-            Make_index_html(items, weekend_stats, weekday_stats)
+            make_index_html(items, weekend_stats, weekday_stats)
         else:
             print "No entries were added this time."
 
@@ -699,7 +658,7 @@ if __name__=='__main__':
         print exceptional_text, ' '.join(progress_text)
         traceback.print_exc(file=sys.stdout)
         try:
-            sendEmail('Exception thrown in techcrunch.py',
+            send_email('Exception thrown in techcrunch.py',
                       exceptional_text + "\n" + traceback.format_exc(),
                       ('david.blume@gmail.com',))
         except Exception as e:


...	...	@@ -7,7 +7,11 @@ and decide which articles to include in its own feed.
7	7
8	8	Here's a [blog post about it](http://david.dlma.com/blog/my-techcrunch-feed-filter).
9	9
10		-# History
	10	+# To Do
	11	+
	12	+* Maybe use Reddit upvotes
	13	+
	14	+# Pre-Git History
11	15
12	16	This was originally archived in a Subversion repo. I'd forgotten about the
13	17	version control and had gotten into the habit of just modifying the production

...	...	@@ -1,13 +1,8 @@
1	1	#!/usr/bin/env python
2	2	#
3		-# TODO:
4		-# 1. Deep links: '<a class="post_more_link snap_nopreview" href="http://www.crunchgear.com/2010/08/21/fanboyism-when-expression-meets-desperation/">Read the rest of this entry »</a>'
5		-# link = "http://techcrunch.com/2010/08/21/fanboyism-when-expression-meets-desperation/"
6		-# 2. Add Reddit counts: curl "http://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
7		-#
8		-# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
9		-#
10		-
	3	+# Testing without affecting the yaml file and saving the updated one aside:
	4	+# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
	5	+# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
11	6	import feedparser
12	7	import yaml
13	8	import sys
...	...	@@ -32,6 +27,7 @@ import texttime
32	27	import operator
33	28	from datetime import timedelta
34	29	import cgi
	30	+import smtp_creds # Your own credentials, used in send_email()
35	31
36	32	debug = True
37	33	any_entry_added = False
...	...	@@ -102,10 +98,11 @@ def asciiize(s):
102	98	return s
103	99
104	100
105		-def sendEmail(subject, message, toaddrs, fromaddr='"techcrunch.py" <techcrunch@techcrunch.dlma.com>'):
	101	+def send_email(subject, message, toaddrs,
	102	+ fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
106	103	""" Sends Email """
107		- smtp = smtplib.SMTP('mail.dlma.com', port=587)
108		- smtp.login(user, passw)
	104	+ smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
	105	+ smtp.login(smtp_creds.user, smtp_creds.passw)
109	106	smtp.sendmail(fromaddr, \
110	107	toaddrs, \
111	108	"Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
...	...	@@ -125,8 +122,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
125	122	# lhs_times, lhs_values = zip(*comments)
126	123	# rhs_times, rhs_values = zip(*rhs)
127	124
128		- # TODO handle failure cases, -1
129		-
130	125	if not len(lhs_times):
131	126	lhs_times = [time_posted,]
132	127	if not len(lhs_values):
...	...	@@ -136,8 +131,6 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
136	131	if not len(rhs_values):
137	132	rhs_values = [0,]
138	133
139		-# lhs_times = [(i - time_posted + 900) / 1800 for i in lhs_times]
140		-# rhs_times = [(i - time_posted + 900) / 1800 for i in rhs_times]
141	134	lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
142	135	rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
143	136
...	...	@@ -212,11 +205,9 @@ def make_chart_url(time_posted, lhs_times, lhs_values, rhs_times,
212	205
213	206
214	207	def process_feed(yaml_items):
215		- """
216		- Retrieve the url and process it.
	208	+ """Retrieve the url and process it.
217	209	feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
218	210	"""
219		-
220	211	feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
221	212	if hasattr(feed, 'status'):
222	213	if feed.status == 304:
...	...	@@ -399,22 +390,7 @@ def Get_reddit_stats(url_string):
399	390
400	391
401	392	def Get_fb_stats(url_string):
402		- """ There are apparently two pretty good ways to do this. One, with FQL, querying for the parameters you want,
403		- and two, with URL id. They go like this:
404		-
405		- FQL:
406		-
407		- u = urllib.quote_plus(url_string)
408		- urllib2.urlopen('https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27' % (u))
409		-
410		- URL ID:
411		-
412		- u = urllib.quote_plus(url_string)
413		- with open('facebook-token.txt', 'r') as f:
414		- token = f.read()
415		- encoded = urllib.urlencode({'access_token': token})
416		- urllib2.urlopen('https://graph.facebook.com/vX.Y/?id=%s&%s' % (u, encoded)
417		- """
	393	+ """Use graph's "engagement" field to get reactions and shares."""
418	394	shares = -1
419	395	comments = -1
420	396	likes = -1
...	...	@@ -423,21 +399,7 @@ def Get_fb_stats(url_string):
423	399
424	400	try:
425	401	encoded = urllib.urlencode({'access_token': facebook_token})
426		-# url = 'https://graph.facebook.com/fql?q=SELECT%%20total_count,comment_count,like_count,share_count%%20FROM%%20link_stat%%20WHERE%%20url=%%27%s%%27&%s'
427		- # This stopped working 2018-01-13, 11:25, after I told Facebook the app would use v2.11
428		- # https://developers.facebook.com/docs/graph-api/changelog/version2.9#gapi-deprecate
429		- # url = 'https://graph.facebook.com/v2.8/?id=%s&fields=og_object{engagement},share&%s'
430		-
431		- # Consider the following for a different engagement field:
432		- # "engagement": {
433		- # "reaction_count": 115,
434		- # "comment_count": 0,
435		- # "share_count": 102,
436		- # "comment_plugin_count": 0
437		- # },
438		- # Where reaction_count + share_count = og_object.engagement.count
439	402	url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
440		-
441	403	f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
442	404	data = f.read()
443	405	f.close()
...	...	@@ -486,18 +448,18 @@ def Get_fb_stats(url_string):
486	448	return shares, comments, likes
487	449
488	450
489		-def Save_image(url_string, file_path):
	451	+def save_image(url_string, file_path):
490	452	try:
491	453	f = urllib2.urlopen(url_string)
492	454	data = f.read()
493	455	f.close()
494	456	except (urllib2.URLError, httplib.BadStatusLine) as e:
495	457	if hasattr(e, 'reason'): # URLError
496		- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
	458	+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Reason:", e.reason
497	459	elif hasattr(e, 'code'): # URLError
498		- print "Save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
	460	+ print "save_image: Error attempting to create", file_path[file_path.rfind('/')+1:], "Code:", e.code
499	461	else:
500		- print "Save_image: Error from urlopen", e
	462	+ print "save_image: Error from urlopen", e
501	463	return url_string
502	464
503	465	if len(data) > 50:
...	...	@@ -507,7 +469,8 @@ def Save_image(url_string, file_path):
507	469	return url_string
508	470
509	471
510		-def Make_index_html(yaml_items, weekend_stats, weekday_stats):
	472	+def make_index_html(yaml_items, weekend_stats, weekday_stats):
	473	+ """Writes a static index.html file from the YAML items."""
511	474	cur_time = int(time.time())
512	475	new_index_fullpath = os.path.join(localdir, 'index.html_new')
513	476	index_fullpath = os.path.join(localdir, 'index.html')
...	...	@@ -541,10 +502,7 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats):
541	502	image_index % 2,
542	503	tag_hit
543	504	)
544		-# if image['title'].startswith( 'Too ' ):
545		-# print image['title'], image['qualified'], image['rhs_times']
546		-# print chart_url
547		- image_url = Save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index)))
	505	+ image_url = save_image(chart_url, os.path.join(cache_path, '%d_%d.png' % (cur_time, image_index)))
548	506	f.write('<tr valign="center" class="%s">\n <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
549	507	(image_index % 2 and "even" or "odd",
550	508	image['link'],
...	...	@@ -568,7 +526,8 @@ def Make_index_html(yaml_items, weekend_stats, weekday_stats):
568	526	os.unlink(fname)
569	527
570	528
571		-def Make_feed_file(yaml_items):
	529	+def make_feed_file(yaml_items):
	530	+ """Writes the RSS feed file with the YAML items."""
572	531	with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
573	532	f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
574	533	f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
...	...	@@ -631,7 +590,7 @@ if __name__=='__main__':
631	590	print "could not open", yaml_fullpath
632	591	items = []
633	592
634		- with open('facebook-token.txt', 'r') as f:
	593	+ with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
635	594	facebook_token = f.read()
636	595
637	596	progress_text = ["read techcrunch.yaml"]
...	...	@@ -688,9 +647,9 @@ if __name__=='__main__':
688	647	with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
689	648	yaml.dump(items, f, encoding='utf-8', width=120)
690	649
691		- Make_feed_file(items)
	650	+ make_feed_file(items)
692	651
693		- Make_index_html(items, weekend_stats, weekday_stats)
	652	+ make_index_html(items, weekend_stats, weekday_stats)
694	653	else:
695	654	print "No entries were added this time."
696	655
...	...	@@ -699,7 +658,7 @@ if __name__=='__main__':
699	658	print exceptional_text, ' '.join(progress_text)
700	659	traceback.print_exc(file=sys.stdout)
701	660	try:
702		- sendEmail('Exception thrown in techcrunch.py',
	661	+ send_email('Exception thrown in techcrunch.py',
703	662	exceptional_text + "\n" + traceback.format_exc(),
704	663	('david.blume@gmail.com',))
705	664	except Exception as e:
706	665