#!/usr/bin/env python3 # From https://pybay.com/site_media/slides/raymond2017-keynote/process.html import os import urllib.request import urllib.error from multiprocessing.pool import ThreadPool from typing import List, Tuple, Union sites: List[str] = [ # 'https://www.yahoo.com/', # 'http://www.cnn.com', # 'http://www.python.org', # 'http://www.jython.org', 'http://www.pypy.org', 'http://www.perl.org', # 'http://www.cisco.com', # 'http://www.facebook.com', # 'http://www.twitter.com', # 'http://www.macrumors.com/', # 'http://arstechnica.com/', # 'http://www.reuters.com/', # 'http://abcnews.go.com/', # 'http://www.cnbc.com/', ] def sitesize(url: str) -> Tuple[str, Union[int, str]]: ''' Determine the size of a website ''' try: with urllib.request.urlopen(url) as f: page = f.read() return url, len(page) except urllib.error.HTTPError as e: return url, str(e) except urllib.error.URLError as e: return url, str(e) def run() -> None: # Network bound: Use max(); CPU bound: Use min() pool = ThreadPool(max(len(sites), os.cpu_count())) for result in pool.imap_unordered(sitesize, sites): print(result) if __name__ == '__main__': run()