Add a tip about setting the number of threads in ThreadPool.
David Blume

David Blume commited on 2020-12-30 12:02:16
Showing 2 changed files, with 7 additions and 3 deletions.

... ...
@@ -1,5 +1,6 @@
1 1
 #!/usr/bin/env python3
2 2
 # From https://pybay.com/site_media/slides/raymond2017-keynote/process.html
3
+import os
3 4
 import urllib.request
4 5
 import urllib.error
5 6
 from multiprocessing.pool import ThreadPool
... ...
@@ -30,10 +31,13 @@ def sitesize(url: str):
30 31
         return url, len(page)
31 32
     except urllib.error.HTTPError as e:
32 33
         return url, str(e)
34
+    except urllib.error.URLError as e:
35
+        return url, "On macOS? Try another python3. " + str(e)
33 36
 
34 37
 
35 38
 def run():
36
-    pool = ThreadPool(2)
39
+    # Network bound: Use max(); CPU bound: Use min()
40
+    pool = ThreadPool(max(len(sites), os.cpu_count()))
37 41
     for result in pool.imap_unordered(sitesize, sites):
38 42
         print(result)
39 43
 
... ...
@@ -35,12 +35,12 @@ def set_v_print(verbose: bool):
35 35
 @timeit
36 36
 def main(debug: bool):
37 37
     start_time = time.time()
38
-    localdir = os.path.abspath(os.path.dirname(sys.argv[0]))
38
+    script_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
39 39
     v_print("Running counter...")
40 40
     counter.counter.run()
41 41
     v_print("Running sitesize...")
42 42
     sitesize.sitesize.run()
43
-    print(f'Done in {localdir}.')
43
+    print(f'{sys.argv[0]} is in {script_dir}.')
44 44
 
45 45
 
46 46
 if __name__ == '__main__':
47 47