Upgrade yaml module
dblume

dblume commited on 2024-07-25 21:28:34
Showing 1 changed files, with 17 additions and 11 deletions.

... ...
@@ -4,8 +4,10 @@
4 4
 # cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \
5 5
 # cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml
6 6
 import feedparser
7
-import yaml
8 7
 import sys
8
+if sys.path[0] != '':
9
+    sys.path.insert(0, '')  # DXB Only needed for cronjobs to find yaml
10
+import yaml
9 11
 import os
10 12
 import time
11 13
 import codecs
... ...
@@ -31,6 +33,9 @@ any_entry_added = False
31 33
 tags_to_post = {'apple', 'google', 'roku'}
32 34
 authors_to_post = ['michael arrington',]
33 35
 
36
+# 2022-02-24: Seeing too many posts with just mean + sigma, adjust with factor
37
+threshold_sigma_factor = 1.5
38
+
34 39
 # TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something...
35 40
 rhs_metric = 'fb_likes'
36 41
 rhs_metric_times = 'comment_times'
... ...
@@ -206,7 +211,8 @@ def process_feed(yaml_items):
206 211
     """Retrieve the url and process it.
207 212
     feed_info (in, out) A tuple that describes an individual feed, like its name and etag.
208 213
     """
209
-    feed = feedparser.parse('https://techcrunch.com/feed/')
214
+    #feed = feedparser.parse('https://techcrunch.com/feed/')
215
+    feed = feedparser.parse('https://pi.dlma.com/techcrunch_feed.php')  # DXB temporary until removed from denylist
210 216
     if hasattr(feed, 'status'):
211 217
         if feed.status == 304:
212 218
             pass
... ...
@@ -460,9 +466,9 @@ def make_index_html(yaml_items, weekend_stats, weekday_stats):
460 466
     with codecs.open(new_index_fullpath, 'w', 'utf-8') as f:
461 467
         f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue()))
462 468
         chart_io.close()
463
-        f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n')
464
-        f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2]))
465
-        f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2]))
469
+        f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold (mean + sigma * %1.1f)</th></tr>\n' % threshold_sigma_factor)
470
+        f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] * threshold_sigma_factor))
471
+        f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] * threshold_sigma_factor))
466 472
         f.write('</table></div>\n<br />\n')
467 473
         f.write('<div align="center">\n<table>\n')
468 474
         for image_index, image in enumerate(yaml_items[:40]):
... ...
@@ -534,7 +540,7 @@ if __name__=='__main__':
534 540
         yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml')
535 541
         if os.path.exists(yaml_fullpath):
536 542
             with open(yaml_fullpath, 'rb') as f:
537
-                items = yaml.load(f)
543
+                items = yaml.load(f, Loader=yaml.Loader)
538 544
                 if items is None:
539 545
                     print yaml_fullpath, "exists, but was empty."
540 546
                     items = []
... ...
@@ -562,9 +568,9 @@ if __name__=='__main__':
562 568
 
563 569
             # We'll only look at the stats up to 2 hours after posting.
564 570
             weekend_median, weekend_mean, weekend_sigma = weekend_stats[2]
565
-            weekend_threshold = weekend_mean + weekend_sigma
571
+            weekend_threshold = weekend_mean + weekend_sigma * threshold_sigma_factor
566 572
             weekday_median, weekday_mean, weekday_sigma = weekday_stats[2]
567
-            weekday_threshold = weekday_mean + weekday_sigma
573
+            weekday_threshold = weekday_mean + weekday_sigma * threshold_sigma_factor
568 574
             for item in items:
569 575
                 wday = time.localtime(item['orig_posted']).tm_wday
570 576
                 if wday == 5 or wday == 6:
... ...
@@ -597,15 +603,15 @@ if __name__=='__main__':
597 603
             # For the one file we really use, write to a file on the side, then move it.
598 604
             yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml')
599 605
             with open(yaml_newfile_fullpath, 'wb') as f:
600
-                yaml.dump(items, f, width=120)
606
+                yaml.dump(items, f, default_flow_style=None, width=120)
601 607
             try:
602 608
                 os.rename(yaml_newfile_fullpath, yaml_fullpath)
603 609
             except OSError as e:
604 610
                 print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath)
605 611
             with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f:
606
-                yaml.dump(items, f, width=120)
612
+                yaml.dump(items, f, default_flow_style=None, width=120)
607 613
             with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f:
608
-                yaml.dump(items, f, encoding='utf-8', width=120)
614
+                yaml.dump(items, f, default_flow_style=None, encoding='utf-8', width=120)
609 615
 
610 616
             make_feed_file(items)
611 617
 
612 618