master
David Blume Catch up to production agai...

David Blume authored 6 years ago

1) #!/usr/bin/env python
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

2) #
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

3) # Testing without affecting the yaml file and saving the updated one aside:
4) # cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
5) # cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

6) import feedparser
7) import yaml
8) import sys
9) import os
10) import time
11) import codecs
12) import traceback
13) import calendar
14) import pickle
15) import exceptions
16) import urllib
17) import urllib2
18) import httplib
19) import shutil
20) import smtplib
21) import analysis
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

22) import json
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

23) import xml
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

24) import operator
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

25) import cgi
David Blume Google terminated image cha...

David Blume authored 5 years ago

26) import cStringIO
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

27) import smtp_creds  # Your own credentials, used in send_email()
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

28) 
29) debug = True
30) any_entry_added = False
David Blume Google terminated image cha...

David Blume authored 5 years ago

31) tags_to_post = {'apple', 'google', 'roku'}
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

32) authors_to_post = ['michael arrington',]
33) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

34) # TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
35) rhs_metric = 'fb_likes'
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

36) rhs_metric_times = 'comment_times'
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

37) 
38) localdir = ''
39) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

40) html_head = """<!DOCTYPE html>
41) <html><head>
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

42)   <title>TechCrunch Feed Filter</title>
43)   <!-- <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://techcrunch.dlma.com/rss_feed.xml" /> -->
44)   <link rel="alternate" type="application/rss+xml" title="RSS feed" href="http://feeds.feedburner.com/TrendingAtTechcrunch" />
45)   <style type="text/css">
46)     body { font-family: "Arial", san-serif; }
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

47)     .author { font-size: smaller; color:gray; }
48)     .date { font-size: smaller; color:gray; }
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

49)     .h3 { font-size: larger; }
50)     a { text-decoration: none; }
51)     /* table { border: none; border-collapse:collapse; font-size: large } */
52)     table { border-collapse: collapse; }
53)     table.legend { border:1px solid LightSlateGray; font-size: medium; border-collapse:separated; }
54)     table.legend th { border: 1px solid LightSlateGray; background-color: #E0E0E0; }
55)     table.legend td { border: 1px solid LightSlateGray; }
56)     tr.even { background:#%s; padding: 2em; }
57)     tr.odd { background:#%s; padding-bottom: 2em; }
David Blume Little cleanup, div to rese...

David Blume authored 5 years ago

58)     td div { height: 68px; }
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

59)   </style>
David Blume Google terminated image cha...

David Blume authored 5 years ago

60)   <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
61)   <script type="text/javascript">
62)     google.charts.load('current', {'packages':['corechart']});
63)     google.charts.setOnLoadCallback(drawChart);
64)     function drawChart() {
65)       var options = {
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

66)         width:%d,
David Blume Google terminated image cha...

David Blume authored 5 years ago

67)         height:68,
68)         pointSize:0.1,
69)         dataOpacity:1.0,
70)         series: { 0: {targetAxisIndex:0}, 1: {targetAxisIndex:1, color:'limegreen'} },
71)         vAxis: { gridlines: {count: 0}, maxValue: 1 },
72)         hAxis: { gridlines: {count: 0}, ticks: [] },
73)         vAxes: { 0: {textStyle: {fontSize: 11, color: 'blue'} }, 1: {viewWindowMode: 'maximized', baselineColor: '#A0D0A0', textStyle: {fontSize: 11, color: 'limegreen'} } },
74)       };
75) %s
76)     }
77)   </script>
78) </head>
79) <body>
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

80) <div align='center'><h3>TechCrunch Feed Filter</h3></div>
David Blume Add feed icon and link to t...

David Blume authored 6 years ago

81) This page shows what analysis is done to filter the noise away from the Techcrunch feed into
82) <a href="http://feeds.feedburner.com/TrendingAtTechcrunch"> a more concise feed <img src="feed.png" alt="feed" height="14" width="14"></a>.
David Blume Google terminated image cha...

David Blume authored 5 years ago

83) <a href="https://david.dlma.com/blog/my-techcrunch-feed-filter">Learn more about the Feed Filter</a>.<br /><br />
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

84) """
85) 
86) html_footer = """
87) </table>
88) </div><br />
89) <div align='center'>Thanks to <a href="http://www.feedparser.org/">The Universal Feed Parser module</a>,
David Blume Add feed icon and link to t...

David Blume authored 6 years ago

90) <a href="http://pyyaml.org/">PyYAML</a> and <a href="http://code.google.com/apis/chart/">Google Charts</a>.<br />
David Blume Google terminated image cha...

David Blume authored 5 years ago

91) <a href="http://git.dlma.com/techcrunch.git/">source</a> &bull; <a href="techcrunch.yaml">raw data</a> &bull; <a href="stats.txt">status</a><br />&copy; 2011 <a href="https://david.dlma.com">David Blume</a></div><br />
92) </body>
93) </html>
94) """
95) 
96) chart_data_header = """      var data = google.visualization.arrayToDataTable([
97)         ['', 'Comments', 'Shares', {'type': 'string', 'role': 'style'}],
98) """
99) chart_data_middle = """      ]);
100)       var chart = new google.visualization.LineChart(document.getElementById('chart%d'));
101)       options.backgroundColor = '#%s';
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

102) """
103) 
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

104) img_width = 240
David Blume Google terminated image cha...

David Blume authored 5 years ago

105) img_height = 68
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

106) 
107) series_1_color = "0000FF"
108) series_2_color = "00AA00"
109) threshold_color = "FF8C00"
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

110) tag_color = "F01000"
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

111) 
112) even_background = "F8F8F8"
113) odd_background = "E8E8E8"
114) 
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

115) even_watermark = "E0E0FF"
116) odd_watermark = "D0D0F0"
117) 
118) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

119) def asciiize(s):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

120)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

121)         return s.encode('ascii')
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

122)     except UnicodeEncodeError:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

123)         return s
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

124)     except exceptions.AttributeError:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

125)         return s
126) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

127) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

128) def send_email(subject, message, toaddrs,
129)         fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
130)     """ Sends Email """
131)     smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
132)     smtp.login(smtp_creds.user, smtp_creds.passw)
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

133)     smtp.sendmail(fromaddr,
134)                   toaddrs,
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

135)                   "Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
136)                   (fromaddr, ", ".join(toaddrs), subject, message))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

137)     smtp.quit()
138) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

139) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

140) def index_id(a_list, op, elem):
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

141)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

142)         return (index for index, item in enumerate(a_list) if op(item, elem)).next()
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

143)     except:
144)         return -1
145) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

146) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

147) def write_chart_data(time_posted, lhs_times, lhs_values, rhs_times,
148)                    rhs_values, threshold_value, image_index, tag_hit, chart_io):
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

149) #    lhs_times, lhs_values = zip(*comments)
150) #    rhs_times, rhs_values = zip(*rhs)
David Blume Google terminated image cha...

David Blume authored 5 years ago

151)     is_odd_row = image_index % 2
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

152) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

153)     if not len(lhs_times):
154)         lhs_times = [time_posted,]
155)     if not len(lhs_values):
156)         lhs_values = [0,]
157)     if not len(rhs_times):
158)         rhs_times = [time_posted,]
159)     if not len(rhs_values):
160)         rhs_values = [0,]
161) 
162)     lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
163)     rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
164) 
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

165)     met_threshold_pt = -1
166)     if threshold_value != -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

167)         met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value)
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

168)         if met_threshold_pt == -1 or tag_hit:
169)             # This can happen if threshold_value was set to a number
170)             # because the author or a tag was matched, but the article
171)             # was unpopular. We choose to put a marker at point index 0.
172)             met_threshold_pt = 0
173) 
174)     if is_odd_row != 0:
175)         bg_color = even_background
176)     else:
177)         bg_color = odd_background
David Blume Google terminated image cha...

David Blume authored 5 years ago

178) 
179)     chart_io.write(chart_data_header)
180)     for i in range(8):
181)         if i == met_threshold_pt:
182)             if tag_hit:
183)                 style = "'point { size: 5; fill-color: #FF0000; shape-type: diamond}'"
184)             else:
185)                 style = "'point { size: 5; fill-color: #FF8C00; }'"
186)         else:
187)             style = "null"
188)         if i < len(lhs_values):
189)             lhs_value = str(lhs_values[i])
190)         else:
191)             lhs_value = "null"
192)         if i < len(rhs_values):
193)             rhs_value = str(rhs_values[i])
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

194)         else:
David Blume Google terminated image cha...

David Blume authored 5 years ago

195)             rhs_value = "null"
196)         chart_io.write("        [%d,  %s,        %s, %s],\n" % (i, lhs_value, rhs_value, style))
197)     chart_io.write(chart_data_middle % (image_index, bg_color))
198)     if met_threshold_pt == -1 and not tag_hit:
199)         chart_io.write("      delete options.vAxes[1].baseline;\n")
200)     else:
201)         chart_io.write("      options.vAxes[1].baseline = %d;\n" % (threshold_value,))
202)     chart_io.write("      chart.draw(data, options);\n\n")
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

203) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

204) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

205) def process_feed(yaml_items):
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

206)     """Retrieve the url and process it.
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

207)     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
208)     """
dblume New feed URL, added comment...

dblume authored 2 years ago

209)     feed = feedparser.parse('https://techcrunch.com/feed/')
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

210)     if hasattr(feed, 'status'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

211)         if feed.status == 304:
212)             pass
213)         else:
214)             if feed.status != 200 and feed.status != 307 and feed.status != 301 and feed.status != 302:
215)                 if feed.status == 503:
216)                     print "the feed is temporarily unavailable."
217)                 elif feed.status == 400:
218)                     print "the feed says we made a bad request."
219)                 elif feed.status == 502:
220)                     print "the feed reported a bad gateway error."
221)                 elif feed.status == 404:
222)                     print "the feed says the page was not found."
223)                 elif feed.status == 500:
224)                     print "the feed had an internal server error."
225)                 elif feed.status == 403:
226)                     print "Access to the feed was forbidden."
227)                 else:
228)                     print "the feed returned feed.status %d." % ( feed.status, )
229)             else:
230)                 # Save off this
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

231)                 if hasattr(feed, 'bozo_exception') and isinstance(feed.bozo_exception, xml.sax._exceptions.SAXParseException):
232)                     print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % (str(feed.bozo_exception))
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

233)                 else:
234)                     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

235)                         with open(os.path.join(localdir, 'techcrunch_feed.pickle'), 'wb') as f:
236)                             pickle.dump(feed, f)
David Blume Catch up to production agai...

David Blume authored 6 years ago

237)                     except(pickle.PicklingError, exceptions.TypeError) as e:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

238)                         print "An error occurred while pickling the feed: %s." % \
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

239)                               (# str(e.__class__),
240)                                str(e))
241)                         traceback.print_exc(3, file=sys.stdout)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

242) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

243)             for i in reversed(feed.entries):
244)                 process_item(i, yaml_items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

245) 
246)             # If we have more than 200 items, remove the old ones.
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

247)             while len(yaml_items) > 200:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

248)                 yaml_items.pop()
249) 
250)             for i in yaml_items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

251)                 # i['title'] = asciiize(i['title'])
252)                 # i['tags'] = map(asciiize, i['tags'])
253)                 process_yaml_item(i)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

254) 
255)     else:
256)         if hasattr(feed, 'bozo_exception'):
257)             e = feed.bozo_exception
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

258)             if isinstance(e, urllib2.URLError):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

259)                 print_last_line = True
260)                 if hasattr(e, 'reason'):
261)                     if e.reason[0] == 110:
262)                         print "the feed's connection timed out."
263)                         print_last_line = False
264)                     elif e.reason[0] == 111:
265)                         print "the feed's connection was refused."
266)                         print_last_line = False
267)                     elif e.reason[0] == 104:
268)                         print "the feed reset the connection."
269)                         print_last_line = False
270)                     else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

271)                         print "the feed had a URLError with reason %s." % (str(e.reason),)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

272)                         print_last_line = False
273)                 if print_last_line:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

274)                     print "the feed had a URLError %s" % (str(e),)
275)             elif isinstance(e, httplib.BadStatusLine):
276)                 print "the feed gave a bad status line. (%s)" % (str(e),)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

277)             else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

278)                 if len(str(e)):
279)                     print "the feed bozo_exception: %s \"%s\"" % (str(e.__class__), str(e))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

280)                 else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

281)                     print "the feed bozo_exception: %s %s" % (str(e.__class__), repr(e))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

282)         else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

283)             print "the feed returned class %s, %s" % (str(feed.__class__), str(feed))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

284) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

285) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

286) def process_item(feed_item, yaml_items):
David Blume Catch up to production agai...

David Blume authored 6 years ago

287)     """Processes an RSS feed item, and converts it to a YAML item"""
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

288)     # Get the time
289)     global any_entry_added
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

290)     timecode_now = int(time.time())
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

291)     date_parsed = time.gmtime()
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

292)     if hasattr(feed_item, 'issued_parsed'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

293)         date_parsed = feed_item.issued_parsed
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

294)     elif hasattr(feed_item, 'date_parsed'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

295)         date_parsed = feed_item.date_parsed
296)     else:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

297)         print "process_item found no timestamp for", asciiize(feed_item.link)
298)     timecode_parsed = calendar.timegm(date_parsed)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

299) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

300)     link = feed_item.link
301)     if hasattr(feed_item, 'feedburner_origlink'):
302)         link = feed_item.feedburner_origlink
303) 
304)     # TODO 2018-01-18: Leave in the ncid for URL clicks, but remove during processing.
305) #    suffix_to_remove = '?ncid=rss'
306) #    if link.endswith(suffix_to_remove):
307) #        link = link[:-len(suffix_to_remove)]
308) 
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

309)     # Look for i.feedburner_origlink in yaml_items
310)     yaml_item = None
311)     for i in yaml_items:
David Blume Catch up to production agai...

David Blume authored 6 years ago

312)         if link == i['link']:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

313)             yaml_item = i
314)             break
David Blume Catch up to production agai...

David Blume authored 6 years ago

315)     if yaml_item is None:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

316)         author = ''
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

317)         if hasattr(feed_item, 'author'):
318)             author = asciiize(feed_item.author)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

319) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

320)         # Make a new yaml_item
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

321)         yaml_item = {'title'               : asciiize(feed_item.title),
322)                      'link'                : asciiize(link),
323)                      'author'              : author,
324)                      'tags'                : [],
325)                      'orig_posted'         : timecode_parsed,
326)                      'qualified'           : -1,
327)                      'comment_times'       : [],
328)                      'fb_comments'         : [],
329)                      'fb_shares'           : [],
330)                      'fb_likes'            : [],
331)                      'slash_comment_times' : [],
332)                      'slash_comments'      : []
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

333)                     }
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

334)         if hasattr(feed_item, 'tags'):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

335)             for i in feed_item.tags:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

336)                 yaml_item['tags'].append(asciiize(i.term))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

337) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

338)         yaml_items.insert(0, yaml_item)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

339)         any_entry_added = True
340) 
341)     # Maybe check to ensure that this item isn't too old.
342)     if timecode_parsed < timecode_now - 60 * 30 * 9:
343)         return
344) 
345)     # Now, add the new values
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

346)     if hasattr(feed_item, 'slash_comments') and len(yaml_item['slash_comments']) < 8:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

347)         any_entry_added = True
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

348)         yaml_item['slash_comment_times'].append(timecode_now)
349)         yaml_item['slash_comments'].append(int(feed_item.slash_comments))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

350) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

351) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

352) def process_yaml_item(yaml_item):
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

353)     global any_entry_added
354) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

355)     # Related to TODO 2018-01-18: Remove ncid only during processing.
356)     link = yaml_item['link']
357)     suffix_to_remove = '?ncid=rss'
358)     # Maybe we should find() it instead, in case feedburner adds other options
359)     if link.endswith(suffix_to_remove):
360)         link = link[:-len(suffix_to_remove)]
361) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

362)     timecode_now = int(time.time())
363)     if len(yaml_item['fb_comments']) < 8:
David Blume Catch up to production agai...

David Blume authored 6 years ago

364)         num_shares, num_comments, num_likes = Get_fb_stats(link)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

365)         if num_comments != -1:
366)             any_entry_added = True
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

367)             yaml_item['comment_times'].append(timecode_now)
368)             yaml_item['fb_shares'].append(num_shares)
369)             yaml_item['fb_comments'].append(num_comments)
370)             yaml_item['fb_likes'].append(num_likes)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

371) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

372) #    if len(yaml_item['reddit_']) < 8:
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

373) #        num_ = get_reddit_stats(link)
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

374) #        if num_ != -1:
375) #            any_entry_added = True
376) #            yaml_item['reddit_times'].append(timecode_now)
377) #            yaml_item['reddit_'].append(num_)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

378) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

379) 
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

380) def get_reddit_stats(url_string):
David Blume Catch up to production agai...

David Blume authored 6 years ago

381)     """ Consider curl "https://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

382)     """
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

383)     return -1
384) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

385) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

386) def Get_fb_stats(url_string):
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

387)     """Use graph's "engagement" field to get reactions and shares."""
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

388)     shares = -1
389)     comments = -1
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

390)     likes = -1
391) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

392)     url_string = url_string.encode('utf-8')
393) 
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

394)     try:
David Blume Catch up to production agai...

David Blume authored 6 years ago

395)         encoded = urllib.urlencode({'access_token': facebook_token})
396)         url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
397)         f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

398)         data = f.read()
399)         f.close()
David Blume Catch up to production agai...

David Blume authored 6 years ago

400)     except (urllib2.URLError, httplib.BadStatusLine) as e:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

401)         if hasattr(e, 'reason'): # URLError
David Blume Catch up to production agai...

David Blume authored 6 years ago

402)             if hasattr(e, 'code'):
403)                 print "Get_fb_stats got an error (1):", e.code, e.reason, url_string
404)             else:
405)                 print "Get_fb_stats got an error (2):", e.reason, url_string
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

406)         elif hasattr(e, 'code'): #URLError
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

407)             print "Get_fb_stats got an error. Code:", e.code, url_string
408)         else:
David Blume Catch up to production agai...

David Blume authored 6 years ago

409)             print "Get_fb_stats got an error (3):", str(e)
410)         return shares, comments, likes
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

411)     if len(data) > 20:
David Blume Catch up to production agai...

David Blume authored 6 years ago

412)         d = json.loads(data)['engagement']
413)         try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

414)             shares = d['share_count']
David Blume Catch up to production agai...

David Blume authored 6 years ago

415)         except KeyError:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

416)             shares = 0
David Blume Catch up to production agai...

David Blume authored 6 years ago

417) 
418)         try:
419)             likes = d['reaction_count']
420)         except KeyError:
421)             likes = 0
422) 
423)         # TODO 2018-01-18: og_object metric was likes + shares + comments
424)         # Here we'll combine likes and shares, and comments with plugin_comments
425)         likes += shares
426) 
427)         try:
428)             comments = d['comment_plugin_count'] + d['comment_count']
429)         except KeyError:
430)             comments = 0
David Blume 2013-08-04: Miscellaneous c...

David Blume authored 6 years ago

431)     else:
432)         print "Get_fb_stats got too little data for ",  url_string
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

433)     return shares, comments, likes
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

434) 
435) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

436) def make_index_html(yaml_items, weekend_stats, weekday_stats):
437)     """Writes a static index.html file from the YAML items."""
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

438)     cur_time = int(time.time())
439)     new_index_fullpath = os.path.join(localdir, 'index.html_new')
440)     index_fullpath = os.path.join(localdir, 'index.html')
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

441) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

442)     chart_io = cStringIO.StringIO()
443)     for image_index, image in enumerate(yaml_items[:40]):
444)         tag_hit = False
445)         if image['author'].lower() in authors_to_post:
446)             tag_hit = True
447)         elif len(set([j.lower() for j in image['tags']]) & tags_to_post) > 0:
448)             tag_hit = True
449)         write_chart_data(image['orig_posted'],
450)                          image['comment_times'],
451)                          image['fb_comments'],
452)                          image[rhs_metric_times],
453)                          image[rhs_metric],
454)                          image['qualified'],
455)                          image_index,
456)                          tag_hit,
457)                          chart_io
458)                         )
David Blume Catch up to production agai...

David Blume authored 6 years ago

459) 
David Blume Google terminated image cha...

David Blume authored 5 years ago

460)     with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

461)         f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
David Blume Google terminated image cha...

David Blume authored 5 years ago

462)         chart_io.close()
David Blume Catch up to production agai...

David Blume authored 6 years ago

463)         f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
464)         f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
465)         f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
466)         f.write('</table></div>\n<br />\n')
467)         f.write('<div align="center">\n<table>\n')
468)         for image_index, image in enumerate(yaml_items[:40]):
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

469)             f.write('<tr valign="center" class="%s">\n  <td><strong><a href="%s">%s</a></strong> <span class="date">at %s</span> <span class="author">by %s</span></td>\n' % \
David Blume Catch up to production agai...

David Blume authored 6 years ago

470)                      (image_index % 2 and "even" or "odd",
471)                       image['link'],
472)                       image['title'].encode('ascii', 'xmlcharrefreplace'),
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

473)                       time.strftime("%H:%M", time.localtime(image['orig_posted'])).encode('ascii', 'xmlcharrefreplace'),
David Blume Catch up to production agai...

David Blume authored 6 years ago

474)                       image['author'].encode('ascii', 'xmlcharrefreplace'),
475)                      )
476)                    )
477)             f.write('  <td>%s<td>\n' % (image['qualified'] != -1 and '<img src="star_30.png" width="30" height="29" />' or ''))
David Blume Google terminated image cha...

David Blume authored 5 years ago

478)             f.write('  <td><div id="chart%d" /></td></tr>\n' % (image_index, ))
David Blume Catch up to production agai...

David Blume authored 6 years ago

479)         f.write(html_footer)
480) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

481)     if os.path.exists(index_fullpath):
482)         os.unlink(index_fullpath)
483)     shutil.move(new_index_fullpath, index_fullpath)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

484) 
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

485) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

486) def make_feed_file(yaml_items):
487)     """Writes the RSS feed file with the YAML items."""
David Blume Catch up to production agai...

David Blume authored 6 years ago

488)     with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'wb', 'utf-8') as f:
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

489)         f.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<rss version=\"2.0\" xmlns:atom=\"http://www.w3.org/2005/Atom\">\n<channel>\n<atom:link href=\"http://techcrunch.dlma.com/rss_feed.xml\" rel=\"self\" type=\"application/rss+xml\"/>\n<title>Trending at TechCrunch</title><link>http://techcrunch.dlma.com</link>")
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

490)         f.write("<pubDate>%s</pubDate><description>Automatically Generated Feed</description><language>en-us</language>\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

491)         count = 0
492)         for item in yaml_items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

493)             now = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(item['orig_posted']))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

494)             if item['qualified'] != -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

495)                 escaped_title = cgi.escape(item['title']).encode('ascii', 'xmlcharrefreplace')
496)                 escaped_author = cgi.escape(item['author']).encode('ascii', 'xmlcharrefreplace')
497)                 f.write("<item><title>%s</title><pubDate>%s</pubDate><link>%s</link><guid isPermaLink=\"false\">%s</guid><description><![CDATA[By: %s]]></description></item>\n" % \
498)                          (escaped_title, now, item['link'], item['link'], escaped_author))
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

499)                 count += 1
500)                 if count > 14:
501)                     break
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

502)         f.write("</channel></rss>")
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

503) 
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

504) 
505) if __name__=='__main__':
506)     start_time = time.time()
507)     progress_text = []
508) 
509)     old_stdout = sys.stdout
510)     old_stderr = sys.stderr
David Blume Little cleanup, div to rese...

David Blume authored 5 years ago

511)     sys.stdout = sys.stderr = cStringIO.StringIO()
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

512) 
513)     try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

514)         localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

515)         #
516)         # Read in techcrunch.yaml
517)         #
518)         # [ { 'title'               : 'Title Text',
519)         #     'link'                : u'http://techcrunch.com/2010/08/17/google-buzz-who-to-follow/',
520)         #     'author'              : u'MG Siegler',
521)         #     'orig_posted'         : 1282197199
522)         #     'tags'                : [ u'Google', u'privacy' ]
523)         #     'qualified'           : -1
524)         #     'comment_times'       : [ 1282197199, 1282197407 ]
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

525)         #     'fb_comments'         : [ 0, 5 ]
526)         #     'fb_shares'           : [ 0, 300 ]
527)         #     'fb_likes'            : [ 0, 19 ]
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

528)         #     'slash_comment_times' : [ 1282197199, 1282197407 ]
529)         #     'slash_comments'      : [ 0, 5 ]
530)         #    },
531)         #    { ... }
532)         #  ]
533)         #
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

534)         yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
535)         if os.path.exists(yaml_fullpath):
536)             with open(yaml_fullpath, 'rb') as f:
537)                 items = yaml.load(f)
David Blume Catch up to production agai...

David Blume authored 6 years ago

538)                 if items is None:
539)                     print yaml_fullpath, "exists, but was empty."
540)                     items = []
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

541) 
542)                 # Do any dictionary item updating that might be necessary
543) #                for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

544) #                    if not item.has_key('fb_shares'):
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

545) #                        item['fb_shares'] = []
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

546)         else:
547)             print "could not open", yaml_fullpath
548)             items = []
549) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

550)         with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f:
dblume New feed URL, added comment...

dblume authored 2 years ago

551) 	    json_obj = json.load(f)
552) 	    facebook_token = json_obj['access_token']
David Blume Catch up to production agai...

David Blume authored 6 years ago

553) 
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

554)         progress_text = ["read techcrunch.yaml"]
555)         process_feed(items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

556) 
557)         #
558)         # If any work was done, then write files.
559)         #
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

560)         if any_entry_added:
David Blume Better conformance to PEP-8...

David Blume authored 6 years ago

561)             weekend_stats, weekday_stats = analysis.process_feed(items, rhs_metric, rhs_metric_times)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

562) 
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

563)             # We'll only look at the stats up to 2 hours after posting.
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

564)             weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
565)             weekend_threshold = weekend_mean + weekend_sigma
566)             weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
567)             weekday_threshold = weekday_mean + weekday_sigma
568)             for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

569)                 wday = time.localtime(item['orig_posted']).tm_wday
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

570)                 if wday == 5 or wday == 6:
571)                     threshold = weekend_threshold
572)                 else:
573)                     threshold = weekday_threshold
574)                 if item['qualified'] == -1:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

575)                     for i in range(len(item[rhs_metric_times])):
576)                         r_time = item[rhs_metric_times][i]
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

577)                         if r_time - item['orig_posted'] < 7200:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

578)                             if item[rhs_metric][i] >= threshold:
dblume New feed URL, added comment...

dblume authored 2 years ago

579)                                 # Comment out when graph.facebook.com engagement returns only 0s.
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

580)                                 item['qualified'] = threshold
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

581)                                 break
David Blume Fix comment, and update a URL.

David Blume authored 4 years ago

582)                         else:
583)                             break
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

584) 
585)             # Automatically add those items whose authors and tags I like
586)             for item in items:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

587)                 if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0:
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

588)                     if item['author'].lower() in authors_to_post:
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

589)                         item['qualified'] = threshold
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

590)                     elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0:
David Blume 2011-02-04: Update to the c...

David Blume authored 6 years ago

591)                         item['qualified'] = threshold
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

592) 
593)             #
594)             # Write out the updated yaml file.
595)             #
David Blume 2011-02-04: Algorithm chang...

David Blume authored 6 years ago

596) 
597)             # For the one file we really use, write to a file on the side, then move it.
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

598)             yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
599)             with open(yaml_newfile_fullpath, 'wb') as f:
600)                 yaml.dump(items, f, width=120)
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

601)             try:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

602)                 os.rename(yaml_newfile_fullpath, yaml_fullpath)
David Blume 2015-11-23: Resync svn with...

David Blume authored 6 years ago

603)             except OSError as e:
604)                 print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

605)             with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
606)                 yaml.dump(items, f, width=120)
607)             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
608)                 yaml.dump(items, f, encoding='utf-8', width=120)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

609) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

610)             make_feed_file(items)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

611) 
David Blume Moved SMTP credentials to s...

David Blume authored 6 years ago

612)             make_index_html(items, weekend_stats, weekday_stats)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

613)         else:
614)             print "No entries were added this time."
615) 
David Blume Catch up to production agai...

David Blume authored 6 years ago

616)     except Exception as e:
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

617)         exceptional_text = "An exception occurred: " + str(e.__class__) + " " + str(e)
618)         print exceptional_text, ' '.join(progress_text)
619)         traceback.print_exc(file=sys.stdout)
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

620)         try:
David Blume Replace hardcoded string wi...

David Blume authored 6 years ago

621)             send_email('Exception thrown in ' + os.path.basename(__file__),
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

622)                       exceptional_text + "\n" + traceback.format_exc(),
David Blume Replace hardcoded string wi...

David Blume authored 6 years ago

623)                       (smtp_creds.default_recipient,))
David Blume Catch up to production agai...

David Blume authored 6 years ago

624)         except Exception as e:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

625)             print "Could not send email to notify you of the exception. :("
626) 
627)     message = sys.stdout.getvalue()
628)     sys.stdout = old_stdout
629)     sys.stderr = old_stderr
630)     if not debug:
631)         print message
632) 
633)     # Finally, let's save this to a statistics page
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

634)     if os.path.exists(os.path.join(localdir, 'stats.txt')):
635)         with open(os.path.join(localdir, 'stats.txt')) as f:
David Blume Original 2010-09-03 version

David Blume authored 6 years ago

636)             lines = f.readlines()
637)     else:
638)         lines = []
David Blume Catch up to production agai...

David Blume authored 6 years ago

639)     lines = lines[:672] # Just keep the past week's worth
David Blume 2015-11-27: Remove obsolete...

David Blume authored 6 years ago

640)     # status = len(message.strip()) and message.strip().replace('\n', ' - ') or "OK"
641)     status = len(message.strip()) and '\n                       '.join( message.splitlines()) or "OK"
642)     lines.insert(0, "%s %3.0fs %s\n" % (time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status))
643)     with open(os.path.join(localdir,'stats.txt' ), 'w') as f:
644)         f.writelines(lines)