dblume commited on 2024-07-25 21:28:34
Showing 1 changed files, with 17 additions and 11 deletions.
... | ... |
@@ -4,8 +4,10 @@ |
4 | 4 |
# cp techcrunch.yaml techcrunch.yaml_back; ./techcrunch.py; \ |
5 | 5 |
# cp techcrunch.yaml techcrunch_tmp.yaml; cp techcrunch.yaml_back techcrunch.yaml |
6 | 6 |
import feedparser |
7 |
-import yaml |
|
8 | 7 |
import sys |
8 |
+if sys.path[0] != '': |
|
9 |
+ sys.path.insert(0, '') # DXB Only needed for cronjobs to find yaml |
|
10 |
+import yaml |
|
9 | 11 |
import os |
10 | 12 |
import time |
11 | 13 |
import codecs |
... | ... |
@@ -31,6 +33,9 @@ any_entry_added = False |
31 | 33 |
tags_to_post = {'apple', 'google', 'roku'} |
32 | 34 |
authors_to_post = ['michael arrington',] |
33 | 35 |
|
36 |
+# 2022-02-24: Seeing too many posts with just mean + sigma, adjust with factor |
|
37 |
+threshold_sigma_factor = 1.5 |
|
38 |
+ |
|
34 | 39 |
# TODO 2018-01-18: Maybe combine fb_likes with bf_shares or something... |
35 | 40 |
rhs_metric = 'fb_likes' |
36 | 41 |
rhs_metric_times = 'comment_times' |
... | ... |
@@ -206,7 +211,8 @@ def process_feed(yaml_items): |
206 | 211 |
"""Retrieve the url and process it. |
207 | 212 |
feed_info (in, out) A tuple that describes an individual feed, like its name and etag. |
208 | 213 |
""" |
209 |
- feed = feedparser.parse('https://techcrunch.com/feed/') |
|
214 |
+ #feed = feedparser.parse('https://techcrunch.com/feed/') |
|
215 |
+ feed = feedparser.parse('https://pi.dlma.com/techcrunch_feed.php') # DXB temporary until removed from denylist |
|
210 | 216 |
if hasattr(feed, 'status'): |
211 | 217 |
if feed.status == 304: |
212 | 218 |
pass |
... | ... |
@@ -460,9 +466,9 @@ def make_index_html(yaml_items, weekend_stats, weekday_stats): |
460 | 466 |
with codecs.open(new_index_fullpath, 'w', 'utf-8') as f: |
461 | 467 |
f.write(html_head % (even_background, odd_background, img_width, chart_io.getvalue())) |
462 | 468 |
chart_io.close() |
463 |
- f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold</th></tr>\n') |
|
464 |
- f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2])) |
|
465 |
- f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2])) |
|
469 |
+ f.write('<div align="center">\n<table class="legend">\n<tr><th></th><th>Median</th><th>Mean</th><th>Std. Dev</th><th>Threshold (mean + sigma * %1.1f)</th></tr>\n' % threshold_sigma_factor) |
|
470 |
+ f.write('<tr><th>Weekday</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekday_stats[2][0], weekday_stats[2][1], weekday_stats[2][2], weekday_stats[2][1] + weekday_stats[2][2] * threshold_sigma_factor)) |
|
471 |
+ f.write('<tr><th>Weekend</th><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td><td>%1.1f</td></tr>\n' % (weekend_stats[2][0], weekend_stats[2][1], weekend_stats[2][2], weekend_stats[2][1] + weekend_stats[2][2] * threshold_sigma_factor)) |
|
466 | 472 |
f.write('</table></div>\n<br />\n') |
467 | 473 |
f.write('<div align="center">\n<table>\n') |
468 | 474 |
for image_index, image in enumerate(yaml_items[:40]): |
... | ... |
@@ -534,7 +540,7 @@ if __name__=='__main__': |
534 | 540 |
yaml_fullpath = os.path.join(localdir, 'techcrunch.yaml') |
535 | 541 |
if os.path.exists(yaml_fullpath): |
536 | 542 |
with open(yaml_fullpath, 'rb') as f: |
537 |
- items = yaml.load(f) |
|
543 |
+ items = yaml.load(f, Loader=yaml.Loader) |
|
538 | 544 |
if items is None: |
539 | 545 |
print yaml_fullpath, "exists, but was empty." |
540 | 546 |
items = [] |
... | ... |
@@ -562,9 +568,9 @@ if __name__=='__main__': |
562 | 568 |
|
563 | 569 |
# We'll only look at the stats up to 2 hours after posting. |
564 | 570 |
weekend_median, weekend_mean, weekend_sigma = weekend_stats[2] |
565 |
- weekend_threshold = weekend_mean + weekend_sigma |
|
571 |
+ weekend_threshold = weekend_mean + weekend_sigma * threshold_sigma_factor |
|
566 | 572 |
weekday_median, weekday_mean, weekday_sigma = weekday_stats[2] |
567 |
- weekday_threshold = weekday_mean + weekday_sigma |
|
573 |
+ weekday_threshold = weekday_mean + weekday_sigma * threshold_sigma_factor |
|
568 | 574 |
for item in items: |
569 | 575 |
wday = time.localtime(item['orig_posted']).tm_wday |
570 | 576 |
if wday == 5 or wday == 6: |
... | ... |
@@ -597,15 +603,15 @@ if __name__=='__main__': |
597 | 603 |
# For the one file we really use, write to a file on the side, then move it. |
598 | 604 |
yaml_newfile_fullpath = os.path.join(localdir, 'techcrunch_temp_writable.yaml') |
599 | 605 |
with open(yaml_newfile_fullpath, 'wb') as f: |
600 |
- yaml.dump(items, f, width=120) |
|
606 |
+ yaml.dump(items, f, default_flow_style=None, width=120) |
|
601 | 607 |
try: |
602 | 608 |
os.rename(yaml_newfile_fullpath, yaml_fullpath) |
603 | 609 |
except OSError as e: |
604 | 610 |
print "The source file was", yaml_newfile_fullpath, "and exists =", os.path.isfile(yaml_newfile_fullpath) |
605 | 611 |
with open(os.path.join(localdir, 'techcrunch_text.yaml'), 'w') as f: |
606 |
- yaml.dump(items, f, width=120) |
|
612 |
+ yaml.dump(items, f, default_flow_style=None, width=120) |
|
607 | 613 |
with codecs.open(os.path.join(localdir, 'techcrunch_unicode.yaml'), 'w', 'utf-8') as f: |
608 |
- yaml.dump(items, f, encoding='utf-8', width=120) |
|
614 |
+ yaml.dump(items, f, default_flow_style=None, encoding='utf-8', width=120) |
|
609 | 615 |
|
610 | 616 |
make_feed_file(items) |
611 | 617 |
|
612 | 618 |