#!python from multiprocessing import Pool import os import requests import time from PIL import Image TAG = 'arundel_ms_263' LAST_PAGE = 283 ZOOM = 11 TILE_W = 4 TILE_H = 3 # ZOOM = 13 # TILE_W = 16 # TILE_H = 12 # ZOOM = 14 # TILE_W = 33 # TILE_H = 24 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', } def fetch_file(url, fn, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None size = 0 if os.path.exists(fn): return with open(fn, 'wb') as f: for chunk in resp.iter_content(chunk_size=1024): if chunk: size += len(chunk) f.write(chunk) print("{} kb. {}".format(round(size / 1024), fn)) return None # Run this with a pool of 5 agents having a chunksize of 3 until finished def parallel_fetch(dataset): print("Fetching {} tiles".format(len(dataset))) agents = 5 chunksize = 3 with Pool(processes=agents) as pool: pool.starmap(fetch_file, dataset, chunksize) def load_image(fn): try: image = Image.open(fn) width, height = image.size return image, width, height except: return None, 0, 0 # Fetch all the tiles from a tile server and then compose them into a single image def grab(s, n): out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM) if os.path.exists(out_fn): return page = "{:03d}{}".format(n, s) print("Fetching {}...".format(page)) os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True) dataset = [] for i in range(0, TILE_W + 1): for j in range(0, TILE_H + 1): url = "http://www.bl.uk/manuscripts/Proxy.ashx?view={}_f{}_files/{}/{}_{}.jpg".format(TAG, page, ZOOM, i, j) fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) if not os.path.exists(fn): dataset.append((url, fn,)) # Fetch all the images we don't have if len(dataset): parallel_fetch(dataset) # Get the dimensions of the final image (interior plus corners) ww = 258 * (TILE_W - 1) hh = 258 * (TILE_H - 1) fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, 0, 0) image, width, height = load_image(fn) ww += width hh += height fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W, TILE_H) image, width, height = load_image(fn) ww += width hh += height # Build the new canvas by pasting the tiles across it canvas = Image.new('RGB', (ww, hh,)) x = 0 for i in range(0, TILE_W + 1): y = 0 for j in range(0, TILE_H + 1): fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) image, width, height = load_image(fn) if image: canvas.paste(image, (x, y)) y += height x += width canvas.save(out_fn) if __name__ == '__main__': os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True) for n in range(1, LAST_PAGE + 1): grab('v', n) grab('r', n)