39cf9b4240c9e16c0549f4956e0f047948f21acd
David Blume Catch up to production agai...

David Blume authored 6 years ago

1) #!/usr/bin/env python
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

2) #
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

3) # Testing without affecting the yaml file and saving the updated one aside:
4) # cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
5) # cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

6) import feedparser
7) import yaml
8) import sys
9) import os
10) import time
11) import codecs
12) import traceback
13) import calendar
14) import pickle
15) import exceptions
16) import urllib
17) import urllib2
18) import httplib
19) import shutil
20) import smtplib
21) import analysis
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

22) import json
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

23) import xml
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

24) import operator
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

25) import cgi
David Blume Google terminated image cha...

David Blume authored 5 years ago

26) import cStringIO
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

27) import smtp_creds  # Your own credentials, used in send_email()
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

28) 
29) debug = True
30) any_entry_added = False
David Blume Google terminated image cha...

David Blume authored 5 years ago

31) tags_to_post = {'apple', 'google', 'roku'}
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

32) authors_to_post = ['michael arrington',]
33) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

34) # TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
35) rhs_metric = 'fb_likes'
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

36) rhs_metric_times = 'comment_times'
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

37) 
38) localdir = ''
39) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

40) html_head = """<!DOCTYPE html>
41) <html><head>
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

42)   <title>TechCrunch Feed Filter</title>
43)   <!-- <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://techcrunch.dlma.com/rss_feed.xml" /> -->
44)   <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://feeds.feedburner.com/TrendingAtTechcrunch" />
45)   <style type="text/css">
46)     body { font-family: "Arial", san-serif; }
47)     .author { font-size: smaller; }
48)     .h3 { font-size: larger; }
49)     a { text-decoration: none; }
50)     /* table { border: none; border-collapse:collapse; font-size: large } */
51)     table { border-collapse: collapse; }
52)     table.legend { border:1px solid LightSlateGray; font-size: medium; border-collapse:separated; }
53)     table.legend th { border: 1px solid LightSlateGray; background-color: #E0E0E0; }
54)     table.legend td { border: 1px solid LightSlateGray; }
55)     tr.even { background:#%s; padding: 2em; }
56)     tr.odd { background:#%s; padding-bottom: 2em; }
David Blume Little cleanup, div to rese...

David Blume authored 5 years ago

57)     td div { height: 68px; }
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

58)   </style>
David Blume Google terminated image cha...

David Blume authored 5 years ago

59)   <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
60)   <script type="text/javascript">
61)     google.charts.load('current', {'packages':['corechart']});
62)     google.charts.setOnLoadCallback(drawChart);
63)     function drawChart() {
64)       var options = {
65)         width:300,
66)         height:68,
67)         pointSize:0.1,
68)         dataOpacity:1.0,
69)         series: { 0: {targetAxisIndex:0}, 1: {targetAxisIndex:1, color:'limegreen'} },
70)         vAxis: { gridlines: {count: 0}, maxValue: 1 },
71)         hAxis: { gridlines: {count: 0}, ticks: [] },
72)         vAxes: { 0: {textStyle: {fontSize: 11, color: 'blue'} }, 1: {viewWindowMode: 'maximized', baselineColor: '#A0D0A0', textStyle: {fontSize: 11, color: 'limegreen'} } },
73)       };
74) %s
75)     }
76)   </script>
77) </head>
78) <body>
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

79) <div align='center'><h3>TechCrunch Feed Filter</h3></div>
David Blume Add feed icon and link to t...

David Blume authored 6 years ago

80) This page shows what analysis is done to filter the noise away from the Techcrunch feed into
81) <a href="http://feeds.feedburner.com/TrendingAtTechcrunch"> a more concise feed <img src="feed.png" alt="feed" height="14" width="14"></a>.
David Blume Google terminated image cha...

David Blume authored 5 years ago

82) <a href="https://david.dlma.com/blog/my-techcrunch-feed-filter">Learn more about the Feed Filter</a>.<br /><br />
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

83) """
84) 
85) html_footer = """
86) </table>
87) </div><br />
88) <div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
David Blume Add feed icon and link to t...

David Blume authored 6 years ago

89) <a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br />
David Blume Google terminated image cha...

David Blume authored 5 years ago

90) <a href="http://git.dlma.com/techcrunch.git/">source</a> &bull; <a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a><br />&copy; 2011 <a href="https://david.dlma.com">David Blume</a></div><br />
91) </body>
92) </html>
93) """
94) 
95) chart_data_header = """      var data = google.visualization.arrayToDataTable([
96)         ['', 'Comments', 'Shares', {'type': 'string', 'role': 'style'}],
97) """
98) chart_data_middle = """      ]);
99)       var chart = new google.visualization.LineChart(document.getElementById('chart%d'));
100)       options.backgroundColor = '#%s';
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

101) """
102) 
103) img_width = 300
David Blume Google terminated image cha...

David Blume authored 5 years ago

104) img_height = 68
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

105) 
106) series_1_color = "0000FF"
107) series_2_color = "00AA00"
108) threshold_color = "FF8C00"
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

109) tag_color = "F01000"
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

110) 
111) even_background = "F8F8F8"
112) odd_background = "E8E8E8"
113) 
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

114) even_watermark = "E0E0FF"
115) odd_watermark = "D0D0F0"
116) 
117) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

118) def asciiize(s):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

119)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

120)         return s.encode('ascii')
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

121)     except UnicodeEncodeError:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

122)         return s
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

123)     except exceptions.AttributeError:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

124)         return s
125) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

126) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

127) def send_email(subject, message, toaddrs,
128)         fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
129)     """ Sends Email """
130)     smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
131)     smtp.login(smtp_creds.user, smtp_creds.passw)
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

132)     smtp.sendmail(fromaddr,
133)                   toaddrs,
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

134)                   "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
135)                   (fromaddr, ", ".join(toaddrs), subject, message))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

136)     smtp.quit()
137) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

138) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

139) def index_id(a_list, op, elem):
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

140)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

141)         return (index for index, item in enumerate(a_list) if op(item, elem)).next()
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

142)     except:
143)         return -1
144) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

145) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

146) def write_chart_data(time_posted, lhs_times, lhs_values, rhs_times,
147)                    rhs_values, threshold_value, image_index, tag_hit, chart_io):
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

148) #    lhs_times, lhs_values = zip(*comments)
149) #    rhs_times, rhs_values = zip(*rhs)
David Blume Google terminated image cha...

David Blume authored 5 years ago

150)     is_odd_row = image_index % 2
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

151) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

152)     if not len(lhs_times):
153)         lhs_times = [time_posted,]
154)     if not len(lhs_values):
155)         lhs_values = [0,]
156)     if not len(rhs_times):
157)         rhs_times = [time_posted,]
158)     if not len(rhs_values):
159)         rhs_values = [0,]
160) 
161)     lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
162)     rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
163) 
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

164)     met_threshold_pt = -1
165)     if threshold_value != -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

166)         met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value)
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

167)         if met_threshold_pt == -1 or tag_hit:
168)             # This can happen if threshold_value was set to a number
169)             # because the author or a tag was matched, but the article
170)             # was unpopular. We choose to put a marker at point index 0.
171)             met_threshold_pt = 0
172) 
173)     if is_odd_row != 0:
174)         bg_color = even_background
175)     else:
176)         bg_color = odd_background
David Blume Google terminated image cha...

David Blume authored 5 years ago

177) 
178)     chart_io.write(chart_data_header)
179)     for i in range(8):
180)         if i == met_threshold_pt:
181)             if tag_hit:
182)                 style = "'point { size: 5; fill-color: #FF0000; shape-type: diamond}'"
183)             else:
184)                 style = "'point { size: 5; fill-color: #FF8C00; }'"
185)         else:
186)             style = "null"
187)         if i < len(lhs_values):
188)             lhs_value = str(lhs_values[i])
189)         else:
190)             lhs_value = "null"
191)         if i < len(rhs_values):
192)             rhs_value = str(rhs_values[i])
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

193)         else:
David Blume Google terminated image cha...

David Blume authored 5 years ago

194)             rhs_value = "null"
195)         chart_io.write("        [%d,  %s,        %s, %s],\n" % (i, lhs_value, rhs_value, style))
196)     chart_io.write(chart_data_middle % (image_index, bg_color))
197)     if met_threshold_pt == -1 and not tag_hit:
198)         chart_io.write("      delete options.vAxes[1].baseline;\n")
199)     else:
200)         chart_io.write("      options.vAxes[1].baseline = %d;\n" % (threshold_value,))
201)     chart_io.write("      chart.draw(data, options);\n\n")
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

202) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

203) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

204) def process_feed(yaml_items):
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

205)     """Retrieve the url and process it.
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

206)     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
207)     """
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

208)     feed = feedparser.parse('http://feeds.feedburner.com/TechCrunch')
209)     if hasattr(feed, 'status'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

210)         if feed.status == 304:
211)             pass
212)         else:
213)             if feed.status != 200 and feed.status != 307 and feed.status != 301 and feed.status != 302:
214)                 if feed.status == 503:
215)                     print "the feed is temporarily unavailable."
216)                 elif feed.status == 400:
217)                     print "the feed says we made a bad request."
218)                 elif feed.status == 502:
219)                     print "the feed reported a bad gateway error."
220)                 elif feed.status == 404:
221)                     print "the feed says the page was not found."
222)                 elif feed.status == 500:
223)                     print "the feed had an internal server error."
224)                 elif feed.status == 403:
225)                     print "Access to the feed was forbidden."
226)                 else:
227)                     print "the feed returned feed.status %d." % ( feed.status, )
228)             else:
229)                 # Save off this
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

230)                 if hasattr(feed, 'bozo_exception') and isinstance(feed.bozo_exception, xml.sax._exceptions.SAXParseException):
231)                     print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % (str(feed.bozo_exception))
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

232)                 else:
233)                     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

234)                         with open(os.path.join(localdir, 'techcrunch_feed.pickle'), 'wb') as f:
235)                             pickle.dump(feed, f)
David Blume Catch up to production agai...

David Blume authored 6 years ago

236)                     except(pickle.PicklingError, exceptions.TypeError) as e:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

237)                         print "An error occurred while pickling the feed: %s." % \
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

238)                               (# str(e.__class__),
239)                                str(e))
240)                         traceback.print_exc(3, file=sys.stdout)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

241) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

242)             for i in reversed(feed.entries):
243)                 process_item(i, yaml_items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

244) 
245)             # If we have more than 200 items, remove the old ones.
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

246)             while len(yaml_items) > 200:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

247)                 yaml_items.pop()
248) 
249)             for i in yaml_items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

250)                 # i['title'] = asciiize(i['title'])
251)                 # i['tags'] = map(asciiize, i['tags'])
252)                 process_yaml_item(i)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

253) 
254)     else:
255)         if hasattr(feed, 'bozo_exception'):
256)             e = feed.bozo_exception
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

257)             if isinstance(e, urllib2.URLError):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

258)                 print_last_line = True
259)                 if hasattr(e, 'reason'):
260)                     if e.reason[0] == 110:
261)                         print "the feed's connection timed out."
262)                         print_last_line = False
263)                     elif e.reason[0] == 111:
264)                         print "the feed's connection was refused."
265)                         print_last_line = False
266)                     elif e.reason[0] == 104:
267)                         print "the feed reset the connection."
268)                         print_last_line = False
269)                     else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

270)                         print "the feed had a URLError with reason %s." % (str(e.reason),)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

271)                         print_last_line = False
272)                 if print_last_line:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

273)                     print "the feed had a URLError %s" % (str(e),)
274)             elif isinstance(e, httplib.BadStatusLine):
275)                 print "the feed gave a bad status line. (%s)" % (str(e),)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

276)             else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

277)                 if len(str(e)):
278)                     print "the feed bozo_exception: %s \"%s\"" % (str(e.__class__), str(e))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

279)                 else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

280)                     print "the feed bozo_exception: %s %s" % (str(e.__class__), repr(e))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

281)         else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

282)             print "the feed returned class %s, %s" % (str(feed.__class__), str(feed))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

283) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

284) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

285) def process_item(feed_item, yaml_items):
David Blume Catch up to production agai...

David Blume authored 6 years ago

286)     """Processes an RSS feed item, and converts it to a YAML item"""
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

287)     # Get the time
288)     global any_entry_added
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

289)     timecode_now = int(time.time())
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

290)     date_parsed = time.gmtime()
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

291)     if hasattr(feed_item, 'issued_parsed'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

292)         date_parsed = feed_item.issued_parsed
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

293)     elif hasattr(feed_item, 'date_parsed'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

294)         date_parsed = feed_item.date_parsed
295)     else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

296)         print "process_item found no timestamp for", asciiize(feed_item.link)
297)     timecode_parsed = calendar.timegm(date_parsed)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

298) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

299)     link = feed_item.link
300)     if hasattr(feed_item, 'feedburner_origlink'):
301)         link = feed_item.feedburner_origlink
302) 
303)     # TODO 2018-01-18: Leave in the ncid for URL clicks, but remove during processing.
304) #    suffix_to_remove = '?ncid=rss'
305) #    if link.endswith(suffix_to_remove):
306) #        link = link[:-len(suffix_to_remove)]
307) 
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

308)     # Look for i.feedburner_origlink in yaml_items
309)     yaml_item = None
310)     for i in yaml_items:
David Blume Catch up to production agai...

David Blume authored 6 years ago

311)         if link == i['link']:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

312)             yaml_item = i
313)             break
David Blume Catch up to production agai...

David Blume authored 6 years ago

314)     if yaml_item is None:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

315)         author = ''
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

316)         if hasattr(feed_item, 'author'):
317)             author = asciiize(feed_item.author)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

318) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

319)         # Make a new yaml_item
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

320)         yaml_item = {'title'               : asciiize(feed_item.title),
321)                      'link'                : asciiize(link),
322)                      'author'              : author,
323)                      'tags'                : [],
324)                      'orig_posted'         : timecode_parsed,
325)                      'qualified'           : -1,
326)                      'comment_times'       : [],
327)                      'fb_comments'         : [],
328)                      'fb_shares'           : [],
329)                      'fb_likes'            : [],
330)                      'slash_comment_times' : [],
331)                      'slash_comments'      : []
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

332)                     }
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

333)         if hasattr(feed_item, 'tags'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

334)             for i in feed_item.tags:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

335)                 yaml_item['tags'].append(asciiize(i.term))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

336) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

337)         yaml_items.insert(0, yaml_item)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

338)         any_entry_added = True
339) 
340)     # Maybe check to ensure that this item isn't too old.
341)     if timecode_parsed < timecode_now - 60 * 30 * 9:
342)         return
343) 
344)     # Now, add the new values
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

345)     if hasattr(feed_item, 'slash_comments') and len(yaml_item['slash_comments']) < 8:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

346)         any_entry_added = True
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

347)         yaml_item['slash_comment_times'].append(timecode_now)
348)         yaml_item['slash_comments'].append(int(feed_item.slash_comments))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

349) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

350) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

351) def process_yaml_item(yaml_item):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

352)     global any_entry_added
353) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

354)     # Related to TODO 2018-01-18: Remove ncid only during processing.
355)     link = yaml_item['link']
356)     suffix_to_remove = '?ncid=rss'
357)     # Maybe we should find() it instead, in case feedburner adds other options
358)     if link.endswith(suffix_to_remove):
359)         link = link[:-len(suffix_to_remove)]
360) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

361)     timecode_now = int(time.time())
362)     if len(yaml_item['fb_comments']) < 8:
David Blume Catch up to production agai...

David Blume authored 6 years ago

363)         num_shares, num_comments, num_likes = Get_fb_stats(link)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

364)         if num_comments != -1:
365)             any_entry_added = True
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

366)             yaml_item['comment_times'].append(timecode_now)
367)             yaml_item['fb_shares'].append(num_shares)
368)             yaml_item['fb_comments'].append(num_comments)
369)             yaml_item['fb_likes'].append(num_likes)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

370) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

371) #    if len(yaml_item['reddit_']) < 8:
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

372) #        num_ = get_reddit_stats(link)
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

373) #        if num_ != -1:
374) #            any_entry_added = True
375) #            yaml_item['reddit_times'].append(timecode_now)
376) #            yaml_item['reddit_'].append(num_)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

377) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

378) 
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

379) def get_reddit_stats(url_string):
David Blume Catch up to production agai...

David Blume authored 6 years ago

380)     """ Consider curl "https://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

381)     """
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

382)     return -1
383) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

384) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

385) def Get_fb_stats(url_string):
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

386)     """Use graph's "engagement" field to get reactions and shares."""
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

387)     shares = -1
388)     comments = -1
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

389)     likes = -1
390) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

391)     url_string = url_string.encode('utf-8')
392) 
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

393)     try:
David Blume Catch up to production agai...

David Blume authored 6 years ago

394)         encoded = urllib.urlencode({'access_token': facebook_token})
395)         url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
396)         f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

397)         data = f.read()
398)         f.close()
David Blume Catch up to production agai...

David Blume authored 6 years ago

399)     except (urllib2.URLError, httplib.BadStatusLine) as e:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

400)         if hasattr(e, 'reason'): # URLError
David Blume Catch up to production agai...

David Blume authored 6 years ago

401)             if hasattr(e, 'code'):
402)                 print "Get_fb_stats got an error (1):", e.code, e.reason, url_string
403)             else:
404)                 print "Get_fb_stats got an error (2):", e.reason, url_string
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

405)         elif hasattr(e, 'code'): #URLError
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

406)             print "Get_fb_stats got an error. Code:", e.code, url_string
407)         else:
David Blume Catch up to production agai...

David Blume authored 6 years ago

408)             print "Get_fb_stats got an error (3):", str(e)
409)         return shares, comments, likes
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

410)     if len(data) > 20:
David Blume Catch up to production agai...

David Blume authored 6 years ago

411)         d = json.loads(data)['engagement']
412)         try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

413)             shares = d['share_count']
David Blume Catch up to production agai...

David Blume authored 6 years ago

414)         except KeyError:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

415)             shares = 0
David Blume Catch up to production agai...

David Blume authored 6 years ago

416) 
417)         try:
418)             likes = d['reaction_count']
419)         except KeyError:
420)             likes = 0
421) 
422)         # TODO 2018-01-18: og_object metric was likes + shares + comments
423)         # Here we'll combine likes and shares, and comments with plugin_comments
424)         likes += shares
425) 
426)         try:
427)             comments = d['comment_plugin_count'] + d['comment_count']
428)         except KeyError:
429)             comments = 0
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

430)     else:
431)         print "Get_fb_stats got too little data for ",  url_string
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

432)     return shares, comments, likes
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

433) 
434) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

435) def make_index_html(yaml_items, weekend_stats, weekday_stats):
436)     """Writes a static index.html file from the YAML items."""
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

437)     cur_time = int(time.time())
438)     new_index_fullpath = os.path.join(localdir, 'index.html_new')
439)     index_fullpath = os.path.join(localdir, 'index.html')
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

440) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

441)     chart_io = cStringIO.StringIO()
442)     for image_index, image in enumerate(yaml_items[:40]):
443)         tag_hit = False
444)         if image['author'].lower() in authors_to_post:
445)             tag_hit = True
446)         elif len(set([j.lower() for j in image['tags']]) & tags_to_post) > 0:
447)             tag_hit = True
448)         write_chart_data(image['orig_posted'],
449)                          image['comment_times'],
450)                          image['fb_comments'],
451)                          image[rhs_metric_times],
452)                          image[rhs_metric],
453)                          image['qualified'],
454)                          image_index,
455)                          tag_hit,
456)                          chart_io
457)                         )
David Blume Catch up to production agai...

David Blume authored 6 years ago

458) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

459)     with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
460)         f.write(html_head % (even_background, odd_background, chart_io.getvalue()))
461)         chart_io.close()
David Blume Catch up to production agai...

David Blume authored 6 years ago

462)         f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
463)         f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
464)         f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
465)         f.write('</table></div>\n<br />\n')
466)         f.write('<div align="center">\n<table>\n')
467)         for image_index, image in enumerate(yaml_items[:40]):
468)             f.write('<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="author">by %s</span></td>\n' % \
469)                      (image_index % 2 and "even" or "odd",
470)                       image['link'],
471)                       image['title'].encode('ascii', 'xmlcharrefreplace'),
472)                       image['author'].encode('ascii', 'xmlcharrefreplace'),
473)                      )
474)                    )
475)             f.write('  <td>%s<td>\n' % (image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or ''))
David Blume Google terminated image cha...

David Blume authored 5 years ago

476)             f.write('  <td><div id="chart%d" /></td></tr>\n' % (image_index, ))
David Blume Catch up to production agai...

David Blume authored 6 years ago

477)         f.write(html_footer)
478) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

479)     if os.path.exists(index_fullpath):
480)         os.unlink(index_fullpath)
481)     shutil.move(new_index_fullpath, index_fullpath)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

482) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

483) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

484) def make_feed_file(yaml_items):
485)     """Writes the RSS feed file with the YAML items."""
David Blume Catch up to production agai...

David Blume authored 6 years ago

486)     with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
487)         f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\">\n<channel>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

488)         f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

489)         count = 0
490)         for item in yaml_items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

491)             now = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(item['orig_posted']))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

492)             if item['qualified'] != -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

493)                 escaped_title = cgi.escape(item['title']).encode('ascii', 'xmlcharrefreplace')
494)                 escaped_author = cgi.escape(item['author']).encode('ascii', 'xmlcharrefreplace')
495)                 f.write("<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
496)                          (escaped_title, now, item['link'], item['link'], escaped_author))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

497)                 count += 1
498)                 if count > 14:
499)                     break
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

500)         f.write("</channel></rss>")
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

501) 
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

502) 
503) if __name__=='__main__':
504)     start_time = time.time()
505)     progress_text = []
506) 
507)     old_stdout = sys.stdout
508)     old_stderr = sys.stderr
David Blume Little cleanup, div to rese...

David Blume authored 5 years ago

509)     sys.stdout = sys.stderr = cStringIO.StringIO()
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

510) 
511)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

512)         localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

513)         #
514)         # Read in techcrunch.yaml
515)         #
516)         # [ { 'title'               : 'Title Text',
517)         #     'link'                : u'http://techcrunch.com/2010/08/17/google-buzz-who-to-follow/',
518)         #     'author'              : u'MG Siegler',
519)         #     'orig_posted'         : 1282197199
520)         #     'tags'                : [ u'Google', u'privacy' ]
521)         #     'qualified'           : -1
522)         #     'comment_times'       : [ 1282197199, 1282197407 ]
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

523)         #     'fb_comments'         : [ 0, 5 ]
524)         #     'fb_shares'           : [ 0, 300 ]
525)         #     'fb_likes'            : [ 0, 19 ]
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

526)         #     'slash_comment_times' : [ 1282197199, 1282197407 ]
527)         #     'slash_comments'      : [ 0, 5 ]
528)         #    },
529)         #    { ... }
530)         #  ]
531)         #
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

532)         yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
533)         if os.path.exists(yaml_fullpath):
534)             with open(yaml_fullpath, 'rb') as f:
535)                 items = yaml.load(f)
David Blume Catch up to production agai...

David Blume authored 6 years ago

536)                 if items is None:
537)                     print yaml_fullpath, "exists, but was empty."
538)                     items = []
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

539) 
540)                 # Do any dictionary item updating that might be necessary
541) #                for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

542) #                    if not item.has_key('fb_shares'):
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

543) #                        item['fb_shares'] = []
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

544)         else:
545)             print "could not open", yaml_fullpath
546)             items = []
547) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

548)         with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
David Blume Catch up to production agai...

David Blume authored 6 years ago

549)             facebook_token = f.read()
550) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

551)         progress_text = ["read techcrunch.yaml"]
552)         process_feed(items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

553) 
554)         #
555)         # If any work was done, then write files.
556)         #
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

557)         if any_entry_added:
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

558)             weekend_stats, weekday_stats = analysis.process_feed(items, rhs_metric, rhs_metric_times)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

559) 
560)             # We'll only look at the stats for the time 1:00 to 1:30 after posting.
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

561)             weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
562)             weekend_threshold = weekend_mean + weekend_sigma
563)             weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
564)             weekday_threshold = weekday_mean + weekday_sigma
565)             for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

566)                 wday = time.localtime(item['orig_posted']).tm_wday
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

567)                 if wday == 5 or wday == 6:
568)                     threshold = weekend_threshold
569)                 else:
570)                     threshold = weekday_threshold
571)                 if item['qualified'] == -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

572)                     for i in range(len(item[rhs_metric_times])):
573)                         r_time = item[rhs_metric_times][i]
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

574)                         if r_time - item['orig_posted'] < 5400:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

575)                             if item[rhs_metric][i] >= threshold:
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

576)                                 item['qualified'] = threshold
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

577)                             if r_time - item['orig_posted'] >= 3600:
578)                                 break
579) 
580)             # Automatically add those items whose authors and tags I like
581)             for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

582)                 if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

583)                     if item['author'].lower() in authors_to_post:
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

584)                         item['qualified'] = threshold
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

585)                     elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0:
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

586)                         item['qualified'] = threshold
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

587) 
588)             #
589)             # Write out the updated yaml file.
590)             #
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

591) 
592)             # For the one file we really use, write to a file on the side, then move it.
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

593)             yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
594)             with open(yaml_newfile_fullpath, 'wb') as f:
595)                 yaml.dump(items, f, width=120)
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

596)             try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

597)                 os.rename(yaml_newfile_fullpath, yaml_fullpath)
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

598)             except OSError as e:
599)                 print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

600)             with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
601)                 yaml.dump(items, f, width=120)
602)             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
603)                 yaml.dump(items, f, encoding='utf-8', width=120)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

604) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

605)             make_feed_file(items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

606) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

607)             make_index_html(items, weekend_stats, weekday_stats)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

608)         else:
609)             print "No entries were added this time."
610) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

611)     except Exception as e:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

612)         exceptional_text = "An exception occurred: " + str(e.__class__) + " " + str(e)
613)         print exceptional_text, ' '.join(progress_text)
614)         traceback.print_exc(file=sys.stdout)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

615)         try:
David Blume Replace hardcoded string wi...

David Blume authored 6 years ago

616)             send_email('Exception thrown in ' + os.path.basename(__file__),
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

617)                       exceptional_text + "\n" + traceback.format_exc(),
David Blume Replace hardcoded string wi...

David Blume authored 6 years ago

618)                       (smtp_creds.default_recipient,))
David Blume Catch up to production agai...

David Blume authored 6 years ago

619)         except Exception as e:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

620)             print "Could not send email to notify you of the exception. :("
621) 
622)     message = sys.stdout.getvalue()
623)     sys.stdout = old_stdout
624)     sys.stderr = old_stderr
625)     if not debug:
626)         print message
627) 
628)     # Finally, let's save this to a statistics page
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

629)     if os.path.exists(os.path.join(localdir, 'stats.txt')):
630)         with open(os.path.join(localdir, 'stats.txt')) as f:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

631)             lines = f.readlines()
632)     else:
633)         lines = []
David Blume Catch up to production agai...

David Blume authored 6 years ago

634)     lines = lines[:672] # Just keep the past week's worth
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

635)     # status = len(message.strip()) and message.strip().replace('\n', ' - ') or "OK"
636)     status = len(message.strip()) and '\n                       '.join( message.splitlines()) or "OK"
637)     lines.insert(0, "%s %3.0fs %s\n" % (time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status))
638)     with open(os.path.join(localdir,'stats.txt' ), 'w') as f:
639)         f.writelines(lines)