David Blume's GitList
Repositories
testpython.git
Code
Commits
Branches
Tags
Search
Tree:
bdee4ce
Branches
Tags
main
python2
testpython.git
sitesize
sitesize.py
Add a tip about setting the number of threads in ThreadPool.
David Blume
commited
bdee4ce
at 2020-12-30 12:02:16
sitesize.py
Blame
History
Raw
#!/usr/bin/env python3 # From https://pybay.com/site_media/slides/raymond2017-keynote/process.html import os import urllib.request import urllib.error from multiprocessing.pool import ThreadPool sites = [ # 'https://www.yahoo.com/', # 'http://www.cnn.com', # 'http://www.python.org', # 'http://www.jython.org', 'http://www.pypy.org', 'http://www.perl.org', # 'http://www.cisco.com', # 'http://www.facebook.com', # 'http://www.twitter.com', # 'http://www.macrumors.com/', # 'http://arstechnica.com/', # 'http://www.reuters.com/', # 'http://abcnews.go.com/', # 'http://www.cnbc.com/', ] def sitesize(url: str): ''' Determine the size of a website ''' try: with urllib.request.urlopen(url) as f: page = f.read() return url, len(page) except urllib.error.HTTPError as e: return url, str(e) except urllib.error.URLError as e: return url, "On macOS? Try another python3. " + str(e) def run(): # Network bound: Use max(); CPU bound: Use min() pool = ThreadPool(max(len(sites), os.cpu_count())) for result in pool.imap_unordered(sitesize, sites): print(result) if __name__ == '__main__': run()