Upgrade yaml module (88479ef) - techcrunch.git

techcrunch.py

@@ -4,8 +4,10 @@
 # cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
 # cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
 import feedparser
-import yaml
 import sys
+if sys.path[0] != '':
+    sys.path.insert(0, '')  # DXB Only needed for cronjobs to find yaml
+import yaml
 import os
 import time
 import codecs
@@ -31,6 +33,9 @@ any_entry_added = False
 tags_to_post = {'apple', 'google', 'roku'}
 authors_to_post = ['michael arrington',]
 
+# 2022-02-24: Seeing too many posts with just mean + sigma, adjust with factor
+threshold_sigma_factor = 1.5
+
 # TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
 rhs_metric = 'fb_likes'
 rhs_metric_times = 'comment_times'
@@ -206,7 +211,8 @@ def process_feed(yaml_items):
     """Retrieve the url and process it.
     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
     """
-    feed = feedparser.parse('https://techcrunch.com/feed/')
+    #feed = feedparser.parse('https://techcrunch.com/feed/')
+    feed = feedparser.parse('https://pi.dlma.com/techcrunch_feed.php')  # DXB temporary until removed from denylist
     if hasattr(feed, 'status'):
         if feed.status == 304:
             pass
@@ -460,9 +466,9 @@ def make_index_html(yaml_items, weekend_stats, weekday_stats):
     with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
         f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
         chart_io.close()
-        f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
-        f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
-        f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
+        f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold (mean + sigma * %1.1f)</th></tr>\n' % threshold_sigma_factor)
+        f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] * threshold_sigma_factor))
+        f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] * threshold_sigma_factor))
         f.write('</table></div>\n<br />\n')
         f.write('<div align="center">\n<table>\n')
         for image_index, image in enumerate(yaml_items[:40]):
@@ -534,7 +540,7 @@ if __name__=='__main__':
         yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
         if os.path.exists(yaml_fullpath):
             with open(yaml_fullpath, 'rb') as f:
-                items = yaml.load(f)
+                items = yaml.load(f, Loader=yaml.Loader)
                 if items is None:
                     print yaml_fullpath, "exists, but was empty."
                     items = []
@@ -562,9 +568,9 @@ if __name__=='__main__':
 
             # We'll only look at the stats up to 2 hours after posting.
             weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
-            weekend_threshold = weekend_mean + weekend_sigma
+            weekend_threshold = weekend_mean + weekend_sigma * threshold_sigma_factor
             weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
-            weekday_threshold = weekday_mean + weekday_sigma
+            weekday_threshold = weekday_mean + weekday_sigma * threshold_sigma_factor
             for item in items:
                 wday = time.localtime(item['orig_posted']).tm_wday
                 if wday == 5 or wday == 6:
@@ -597,15 +603,15 @@ if __name__=='__main__':
             # For the one file we really use, write to a file on the side, then move it.
             yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
             with open(yaml_newfile_fullpath, 'wb') as f:
-                yaml.dump(items, f, width=120)
+                yaml.dump(items, f, default_flow_style=None, width=120)
             try:
                 os.rename(yaml_newfile_fullpath, yaml_fullpath)
             except OSError as e:
                 print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
             with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
-                yaml.dump(items, f, width=120)
+                yaml.dump(items, f, default_flow_style=None, width=120)
             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
-                yaml.dump(items, f, encoding='utf-8', width=120)
+                yaml.dump(items, f, default_flow_style=None, encoding='utf-8', width=120)
 
             make_feed_file(items)
 


...	...	@@ -4,8 +4,10 @@
4	4	# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
5	5	# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
6	6	import feedparser
7		-import yaml
8	7	import sys
	8	+if sys.path[0] != '':
	9	+ sys.path.insert(0, '') # DXB Only needed for cronjobs to find yaml
	10	+import yaml
9	11	import os
10	12	import time
11	13	import codecs
...	...	@@ -31,6 +33,9 @@ any_entry_added = False
31	33	tags_to_post = {'apple', 'google', 'roku'}
32	34	authors_to_post = ['michael arrington',]
33	35
	36	+# 2022-02-24: Seeing too many posts with just mean + sigma, adjust with factor
	37	+threshold_sigma_factor = 1.5
	38	+
34	39	# TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
35	40	rhs_metric = 'fb_likes'
36	41	rhs_metric_times = 'comment_times'
...	...	@@ -206,7 +211,8 @@ def process_feed(yaml_items):
206	211	"""Retrieve the url and process it.
207	212	feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
208	213	"""
209		- feed = feedparser.parse('https://techcrunch.com/feed/')
	214	+ #feed = feedparser.parse('https://techcrunch.com/feed/')
	215	+ feed = feedparser.parse('https://pi.dlma.com/techcrunch_feed.php') # DXB temporary until removed from denylist
210	216	if hasattr(feed, 'status'):
211	217	if feed.status == 304:
212	218	pass
...	...	@@ -460,9 +466,9 @@ def make_index_html(yaml_items, weekend_stats, weekday_stats):
460	466	with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
461	467	f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
462	468	chart_io.close()
463		- f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
464		- f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
465		- f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
	469	+ f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold (mean + sigma * %1.1f)</th></tr>\n' % threshold_sigma_factor)
	470	+ f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] * threshold_sigma_factor))
	471	+ f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] * threshold_sigma_factor))
466	472	f.write('</table></div>\n<br />\n')
467	473	f.write('<div align="center">\n<table>\n')
468	474	for image_index, image in enumerate(yaml_items[:40]):
...	...	@@ -534,7 +540,7 @@ if __name__=='__main__':
534	540	yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
535	541	if os.path.exists(yaml_fullpath):
536	542	with open(yaml_fullpath, 'rb') as f:
537		- items = yaml.load(f)
	543	+ items = yaml.load(f, Loader=yaml.Loader)
538	544	if items is None:
539	545	print yaml_fullpath, "exists, but was empty."
540	546	items = []
...	...	@@ -562,9 +568,9 @@ if __name__=='__main__':
562	568
563	569	# We'll only look at the stats up to 2 hours after posting.
564	570	weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
565		- weekend_threshold = weekend_mean + weekend_sigma
	571	+ weekend_threshold = weekend_mean + weekend_sigma * threshold_sigma_factor
566	572	weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
567		- weekday_threshold = weekday_mean + weekday_sigma
	573	+ weekday_threshold = weekday_mean + weekday_sigma * threshold_sigma_factor
568	574	for item in items:
569	575	wday = time.localtime(item['orig_posted']).tm_wday
570	576	if wday == 5 or wday == 6:
...	...	@@ -597,15 +603,15 @@ if __name__=='__main__':
597	603	# For the one file we really use, write to a file on the side, then move it.
598	604	yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
599	605	with open(yaml_newfile_fullpath, 'wb') as f:
600		- yaml.dump(items, f, width=120)
	606	+ yaml.dump(items, f, default_flow_style=None, width=120)
601	607	try:
602	608	os.rename(yaml_newfile_fullpath, yaml_fullpath)
603	609	except OSError as e:
604	610	print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
605	611	with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
606		- yaml.dump(items, f, width=120)
	612	+ yaml.dump(items, f, default_flow_style=None, width=120)
607	613	with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
608		- yaml.dump(items, f, encoding='utf-8', width=120)
	614	+ yaml.dump(items, f, default_flow_style=None, encoding='utf-8', width=120)
609	615
610	616	make_feed_file(items)
611	617
612	618