David Blume's GitList
Repositories
testpython.git
Code
Commits
Branches
Tags
Search
Tree:
43171df
Branches
Tags
main
python2
testpython.git
sitesize
sitesize.py
boredapi.com cert expired. Use http for now.
dblume
commited
43171df
at 2022-08-21 15:27:06
sitesize.py
Blame
History
Raw
#!/usr/bin/env python3 # From https://pybay.com/site_media/slides/raymond2017-keynote/process.html import os import urllib.request import urllib.error from multiprocessing.pool import ThreadPool from typing import List, Tuple, Union sites: List[str] = [ # 'https://www.yahoo.com/', # 'http://www.cnn.com', # 'http://www.python.org', # 'http://www.jython.org', 'https://www.pypy.org', 'https://www.perl.org', # 'http://www.cisco.com', # 'http://www.facebook.com', # 'http://www.twitter.com', # 'http://www.macrumors.com/', # 'http://arstechnica.com/', # 'http://www.reuters.com/', # 'http://abcnews.go.com/', # 'http://www.cnbc.com/', ] def sitesize(url: str) -> Tuple[str, Union[int, str]]: ''' Determine the size of a website ''' try: with urllib.request.urlopen(url) as f: page = f.read() return url, len(page) except urllib.error.HTTPError as e: return url, str(e) except urllib.error.URLError as e: return url, str(e) def run() -> None: # Network bound: Use max(); CPU bound: Use min() pool = ThreadPool(max(len(sites), os.cpu_count() is None and 1 or os.cpu_count())) # type: ignore for result in pool.imap_unordered(sitesize, sites): print(result) if __name__ == '__main__': run()