from multiprocessing import Pool import os import requests import time from PIL import Image headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', } def fetch_file(url, fn, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None size = 0 if os.path.exists(fn): return with open(fn, 'wb') as f: for chunk in resp.iter_content(chunk_size=1024): if chunk: size += len(chunk) f.write(chunk) print("{} kb. {}".format(round(size / 1024), fn)) return None def fetch_raw(url, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None return resp.text def fetch_json(url, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None return resp.json() # Run this with a pool of 5 agents having a chunksize of 3 until finished def parallel_fetch(dataset): print("Fetching {} tiles".format(len(dataset))) agents = 5 chunksize = 3 with Pool(processes=agents) as pool: pool.starmap(fetch_file, dataset, chunksize) def load_image(fn): try: image = Image.open(fn) width, height = image.size return image, width, height except: return None, 0, 0