TechCrunch Feed Filter

	Median	Mean	Std. Dev	Threshold (mean + sigma * %1.1f)
Weekday	%1.1f	%1.1f	%1.1f	%1.1f
Weekend	%1.1f	%1.1f	%1.1f	%1.1f

Median

Mean

Std. Dev

Threshold (mean + sigma * %1.1f)

Weekday

%1.1f

Weekend

%1.1f

\n\n') for image_index, image in enumerate(yaml_items[:40]): f.write('\n \n' % \ (image_index % 2 and "even" or "odd", image['link'], image['title'], time.strftime("%H:%M", time.localtime(image['orig_posted'])), image['author'], ) ) f.write(' \n' % (image_index, )) f.write(html_footer) if os.path.exists(index_fullpath): os.unlink(index_fullpath) shutil.move(new_index_fullpath, index_fullpath) def make_feed_file(yaml_items): """Writes the RSS feed file with the YAML items.""" with codecs.open(os.path.join(localdir, 'rss_feed.xml'), 'w', 'utf-8') as f: f.write("\n\n\n\nTrending at TechCrunchhttp://techcrunch.dlma.com") f.write("%sAutomatically Generated Feeden-us\n" % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))) count = 0 for item in yaml_items: now = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(item['orig_posted'])) if item['qualified'] != -1: escaped_title = html.escape(item['title']) escaped_author = html.escape(item['author']) f.write("%s%s%s%s\n" % \ (escaped_title, now, item['link'], item['link'], escaped_author)) count += 1 if count > 14: break f.write("") if __name__=='__main__': start_time = time.time() progress_text = [] old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = sys.stderr = io.StringIO() try: localdir = os.path.abspath(os.path.dirname(sys.argv[0])) # # Read in techcrunch.yaml # # [ { 'title' : 'Title Text', # 'link' : u'http://techcrunch.com/2010/08/17/google-buzz-who-to-follow/', # 'author' : u'MG Siegler', # 'orig_posted' : 1282197199 # 'tags' : [ u'Google', u'privacy' ] # 'qualified' : -1 # 'comment_times' : [ 1282197199, 1282197407 ] # 'fb_comments' : [ 0, 5 ] # 'fb_shares' : [ 0, 300 ] # 'fb_likes' : [ 0, 19 ] # 'slash_comment_times' : [ 1282197199, 1282197407 ] # 'slash_comments' : [ 0, 5 ] # }, # { ... } # ] # yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml') if os.path.exists(yaml_fullpath): with open(yaml_fullpath, 'r') as f: items = yaml.load(f, Loader=yaml.Loader) if items is None: print(yaml_fullpath, "exists, but was empty.") items = [] # Do any dictionary item updating that might be necessary # for item in items: # item['link'] = str(item['link']) # if not item.has_key('fb_shares'): # item['fb_shares'] = [] else: print("could not open", yaml_fullpath) items = [] with open(os.path.join(localdir, 'facebook-token.txt'), 'r') as f: json_obj = json.load(f) facebook_token = json_obj['access_token'] progress_text = ["read techcrunch.yaml"] process_feed(items) # # If any work was done, then write files. # if any_entry_added: weekend_stats, weekday_stats = analysis.process_feed(items, rhs_metric, rhs_metric_times) # We'll only look at the stats up to 2 hours after posting. weekend_median, weekend_mean, weekend_sigma = weekend_stats[2] weekend_threshold = weekend_mean + weekend_sigma * threshold_sigma_factor weekday_median, weekday_mean, weekday_sigma = weekday_stats[2] weekday_threshold = weekday_mean + weekday_sigma * threshold_sigma_factor for item in items: wday = time.localtime(item['orig_posted']).tm_wday if wday == 5 or wday == 6: threshold = weekend_threshold else: threshold = weekday_threshold if item['qualified'] == -1: for i in range(len(item[rhs_metric_times])): r_time = item[rhs_metric_times][i] if r_time - item['orig_posted'] < 7200: if item[rhs_metric][i] >= threshold: # Comment out when graph.facebook.com engagement returns only 0s. item['qualified'] = threshold break else: break # Automatically add those items whose authors and tags I like for item in items: if item['qualified'] == -1 and len(item[rhs_metric_times]) > 0: if item['author'].lower() in authors_to_post: item['qualified'] = threshold elif len(set([j.lower() for j in item['tags']]) & tags_to_post) > 0: item['qualified'] = threshold # # Write out the updated yaml file. # # For the one file we really use, write to a file on the side, then move it. yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml') with open(yaml_newfile_fullpath, 'w') as f: yaml.dump(items, f, default_flow_style=None, width=120) try: os.rename(yaml_newfile_fullpath, yaml_fullpath) except OSError as e: print("The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)) with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f: yaml.dump(items, f, default_flow_style=None, width=120) with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f: yaml.dump(items, f, default_flow_style=None, encoding='utf-8', width=120) make_feed_file(items) make_index_html(items, weekend_stats, weekday_stats) else: print("No entries were added this time.") except Exception as e: exceptional_text = "An exception occurred: " + str(e.__class__) + " " + str(e) print(exceptional_text, ' '.join(progress_text)) traceback.print_exc(file=sys.stdout) try: send_email('Exception thrown in ' + os.path.basename(__file__), exceptional_text + "\n" + traceback.format_exc(), (smtp_creds.default_recipient,)) except Exception as e: print("Could not send email to notify you of the exception. :(") message = sys.stdout.getvalue() sys.stdout = old_stdout sys.stderr = old_stderr if not debug: print(message) # Finally, let's save this to a statistics page if os.path.exists(os.path.join(localdir, 'stats.txt')): with open(os.path.join(localdir, 'stats.txt')) as f: lines = f.readlines() else: lines = [] lines = lines[:672] # Just keep the past week's worth # status = len(message.strip()) and message.strip().replace('\n', ' - ') or "OK" status = len(message.strip()) and '\n '.join( message.splitlines()) or "OK" lines.insert(0, "%s %3.0fs %s\n" % (time.strftime('%Y-%m-%d, %H:%M', time.localtime()), time.time() - start_time, status)) with open(os.path.join(localdir,'stats.txt' ), 'w') as f: f.writelines(lines)

%s at %s by %s

\n' % (image['qualified'] != -1 and '

' or '')) f.write('