"""
chart_data_header = """ var data = google.visualization.arrayToDataTable([
['', 'Comments', 'Shares', {'type': 'string', 'role': 'style'}],
"""
chart_data_middle = """ ]);
var chart = new google.visualization.LineChart(document.getElementById('chart%d'));
options.backgroundColor = '#%s';
"""
img_width = 240
img_height = 68
series_1_color = "0000FF"
series_2_color = "00AA00"
threshold_color = "FF8C00"
tag_color = "F01000"
even_background = "F8F8F8"
odd_background = "E8E8E8"
even_watermark = "E0E0FF"
odd_watermark = "D0D0F0"
def asciiize(s):
try:
return s.encode('ascii')
except UnicodeEncodeError:
return s
except exceptions.AttributeError:
return s
def send_email(subject, message, toaddrs,
fromaddr='"%s" <%s>' % (os.path.basename(__file__), smtp_creds.user)):
""" Sends Email """
smtp = smtplib.SMTP(smtp_creds.server, port=smtp_creds.port)
smtp.login(smtp_creds.user, smtp_creds.passw)
smtp.sendmail(fromaddr,
toaddrs,
"Content-Type: text/plain; charset=\"us-ascii\"\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\n%s" % \
(fromaddr, ", ".join(toaddrs), subject, message))
smtp.quit()
def index_id(a_list, op, elem):
try:
return (index for index, item in enumerate(a_list) if op(item, elem)).next()
except:
return -1
def write_chart_data(time_posted, lhs_times, lhs_values, rhs_times,
rhs_values, threshold_value, image_index, tag_hit, chart_io):
# lhs_times, lhs_values = zip(*comments)
# rhs_times, rhs_values = zip(*rhs)
is_odd_row = image_index % 2
if not len(lhs_times):
lhs_times = [time_posted,]
if not len(lhs_values):
lhs_values = [0,]
if not len(rhs_times):
rhs_times = [time_posted,]
if not len(rhs_values):
rhs_values = [0,]
lhs_times = [(i - time_posted) / 1800 for i in lhs_times]
rhs_times = [(i - time_posted) / 1800 for i in rhs_times]
met_threshold_pt = -1
if threshold_value != -1:
met_threshold_pt = index_id(rhs_values, operator.ge, threshold_value)
if met_threshold_pt == -1 or tag_hit:
# This can happen if threshold_value was set to a number
# because the author or a tag was matched, but the article
# was unpopular. We choose to put a marker at point index 0.
met_threshold_pt = 0
if is_odd_row != 0:
bg_color = even_background
else:
bg_color = odd_background
chart_io.write(chart_data_header)
for i in range(8):
if i == met_threshold_pt:
if tag_hit:
style = "'point { size: 5; fill-color: #FF0000; shape-type: diamond}'"
else:
style = "'point { size: 5; fill-color: #FF8C00; }'"
else:
style = "null"
if i < len(lhs_values):
lhs_value = str(lhs_values[i])
else:
lhs_value = "null"
if i < len(rhs_values):
rhs_value = str(rhs_values[i])
else:
rhs_value = "null"
chart_io.write(" [%d, %s, %s, %s],\n" % (i, lhs_value, rhs_value, style))
chart_io.write(chart_data_middle % (image_index, bg_color))
if met_threshold_pt == -1 and not tag_hit:
chart_io.write(" delete options.vAxes[1].baseline;\n")
else:
chart_io.write(" options.vAxes[1].baseline = %d;\n" % (threshold_value,))
chart_io.write(" chart.draw(data, options);\n\n")
def process_feed(yaml_items):
"""Retrieve the url and process it.
feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
"""
#feed = feedparser.parse('https://techcrunch.com/feed/')
feed = feedparser.parse('https://pi.dlma.com/techcrunch_feed.php') # DXB temporary until removed from denylist
if hasattr(feed, 'status'):
if feed.status == 304:
pass
else:
if feed.status != 200 and feed.status != 307 and feed.status != 301 and feed.status != 302:
if feed.status == 503:
print "the feed is temporarily unavailable."
elif feed.status == 400:
print "the feed says we made a bad request."
elif feed.status == 502:
print "the feed reported a bad gateway error."
elif feed.status == 404:
print "the feed says the page was not found."
elif feed.status == 500:
print "the feed had an internal server error."
elif feed.status == 403:
print "Access to the feed was forbidden."
else:
print "the feed returned feed.status %d." % ( feed.status, )
else:
# Save off this
if hasattr(feed, 'bozo_exception') and isinstance(feed.bozo_exception, xml.sax._exceptions.SAXParseException):
print "Didn't pickle TechCrunch feed because it had a bozo_exception: %s" % (str(feed.bozo_exception))
else:
try:
with open(os.path.join(localdir, 'techcrunch_feed.pickle'), 'wb') as f:
pickle.dump(feed, f)
except(pickle.PicklingError, exceptions.TypeError) as e:
print "An error occurred while pickling the feed: %s." % \
(# str(e.__class__),
str(e))
traceback.print_exc(3, file=sys.stdout)
for i in reversed(feed.entries):
process_item(i, yaml_items)
# If we have more than 200 items, remove the old ones.
while len(yaml_items) > 200:
yaml_items.pop()
for i in yaml_items:
# i['title'] = asciiize(i['title'])
# i['tags'] = map(asciiize, i['tags'])
process_yaml_item(i)
else:
if hasattr(feed, 'bozo_exception'):
e = feed.bozo_exception
if isinstance(e, urllib2.URLError):
print_last_line = True
if hasattr(e, 'reason'):
if e.reason[0] == 110:
print "the feed's connection timed out."
print_last_line = False
elif e.reason[0] == 111:
print "the feed's connection was refused."
print_last_line = False
elif e.reason[0] == 104:
print "the feed reset the connection."
print_last_line = False
else:
print "the feed had a URLError with reason %s." % (str(e.reason),)
print_last_line = False
if print_last_line:
print "the feed had a URLError %s" % (str(e),)
elif isinstance(e, httplib.BadStatusLine):
print "the feed gave a bad status line. (%s)" % (str(e),)
else:
if len(str(e)):
print "the feed bozo_exception: %s \"%s\"" % (str(e.__class__), str(e))
else:
print "the feed bozo_exception: %s %s" % (str(e.__class__), repr(e))
else:
print "the feed returned class %s, %s" % (str(feed.__class__), str(feed))
def process_item(feed_item, yaml_items):
"""Processes an RSS feed item, and converts it to a YAML item"""
# Get the time
global any_entry_added
timecode_now = int(time.time())
date_parsed = time.gmtime()
if hasattr(feed_item, 'issued_parsed'):
date_parsed = feed_item.issued_parsed
elif hasattr(feed_item, 'date_parsed'):
date_parsed = feed_item.date_parsed
else:
print "process_item found no timestamp for", asciiize(feed_item.link)
timecode_parsed = calendar.timegm(date_parsed)
link = feed_item.link
if hasattr(feed_item, 'feedburner_origlink'):
link = feed_item.feedburner_origlink
# TODO 2018-01-18: Leave in the ncid for URL clicks, but remove during processing.
# suffix_to_remove = '?ncid=rss'
# if link.endswith(suffix_to_remove):
# link = link[:-len(suffix_to_remove)]
# Look for i.feedburner_origlink in yaml_items
yaml_item = None
for i in yaml_items:
if link == i['link']:
yaml_item = i
break
if yaml_item is None:
author = ''
if hasattr(feed_item, 'author'):
author = asciiize(feed_item.author)
# Make a new yaml_item
yaml_item = {'title' : asciiize(feed_item.title),
'link' : asciiize(link),
'author' : author,
'tags' : [],
'orig_posted' : timecode_parsed,
'qualified' : -1,
'comment_times' : [],
'fb_comments' : [],
'fb_shares' : [],
'fb_likes' : [],
'slash_comment_times' : [],
'slash_comments' : []
}
if hasattr(feed_item, 'tags'):
for i in feed_item.tags:
yaml_item['tags'].append(asciiize(i.term))
yaml_items.insert(0, yaml_item)
any_entry_added = True
# Maybe check to ensure that this item isn't too old.
if timecode_parsed < timecode_now - 60 * 30 * 9:
return
# Now, add the new values
if hasattr(feed_item, 'slash_comments') and len(yaml_item['slash_comments']) < 8:
any_entry_added = True
yaml_item['slash_comment_times'].append(timecode_now)
yaml_item['slash_comments'].append(int(feed_item.slash_comments))
def process_yaml_item(yaml_item):
global any_entry_added
# Related to TODO 2018-01-18: Remove ncid only during processing.
link = yaml_item['link']
suffix_to_remove = '?ncid=rss'
# Maybe we should find() it instead, in case feedburner adds other options
if link.endswith(suffix_to_remove):
link = link[:-len(suffix_to_remove)]
timecode_now = int(time.time())
if len(yaml_item['fb_comments']) < 8:
num_shares, num_comments, num_likes = Get_fb_stats(link)
if num_comments != -1:
any_entry_added = True
yaml_item['comment_times'].append(timecode_now)
yaml_item['fb_shares'].append(num_shares)
yaml_item['fb_comments'].append(num_comments)
yaml_item['fb_likes'].append(num_likes)
# if len(yaml_item['reddit_']) < 8:
# num_ = get_reddit_stats(link)
# if num_ != -1:
# any_entry_added = True
# yaml_item['reddit_times'].append(timecode_now)
# yaml_item['reddit_'].append(num_)
def get_reddit_stats(url_string):
""" Consider curl "https://www.reddit.com/api/info.json?url=http://i.imgur.com/HG9dJ.jpg"
"""
return -1
def Get_fb_stats(url_string):
"""Use graph's "engagement" field to get reactions and shares."""
shares = -1
comments = -1
likes = -1
url_string = url_string.encode('utf-8')
try:
encoded = urllib.urlencode({'access_token': facebook_token})
url = 'https://graph.facebook.com/v2.11/?id=%s&fields=engagement&%s'
f = urllib2.urlopen(url % (urllib.quote_plus(url_string), encoded))
data = f.read()
f.close()
except (urllib2.URLError, httplib.BadStatusLine) as e:
if hasattr(e, 'reason'): # URLError
if hasattr(e, 'code'):
print "Get_fb_stats got an error (1):", e.code, e.reason, url_string
else:
print "Get_fb_stats got an error (2):", e.reason, url_string
elif hasattr(e, 'code'): #URLError
print "Get_fb_stats got an error. Code:", e.code, url_string
else:
print "Get_fb_stats got an error (3):", str(e)
return shares, comments, likes
if len(data) > 20:
d = json.loads(data)['engagement']
try:
shares = d['share_count']
except KeyError:
shares = 0
try:
likes = d['reaction_count']
except KeyError:
likes = 0
# TODO 2018-01-18: og_object metric was likes + shares + comments
# Here we'll combine likes and shares, and comments with plugin_comments
likes += shares
try:
comments = d['comment_plugin_count'] + d['comment_count']
except KeyError:
comments = 0
else:
print "Get_fb_stats got too little data for ", url_string
return shares, comments, likes
def make_index_html(yaml_items, weekend_stats, weekday_stats):
"""Writes a static index.html file from the YAML items."""
cur_time = int(time.time())
new_index_fullpath = os.path.join(localdir, 'index.html_new')
index_fullpath = os.path.join(localdir, 'index.html')
chart_io = cStringIO.StringIO()
for image_index, image in enumerate(yaml_items[:40]):
tag_hit = False
if image['author'].lower() in authors_to_post:
tag_hit = True
elif len(set([j.lower() for j in image['tags']]) & tags_to_post) > 0:
tag_hit = True
write_chart_data(image['orig_posted'],
image['comment_times'],
image['fb_comments'],
image[rhs_metric_times],
image[rhs_metric],
image['qualified'],
image_index,
tag_hit,
chart_io
)
with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
chart_io.close()
f.write('