#!python from multiprocessing import Pool from math import floor import os import requests import time from PIL import Image TAG = 'arundel_ms_263' LAST_PAGE = 283 ZOOM = 11 TILE_W = 4 TILE_H = 3 # ZOOM = 13 # TILE_W = 16 # TILE_H = 12 # ZOOM = 14 # TILE_W = 33 # TILE_H = 24 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', } def fetch_file(url, fn, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None size = 0 if os.path.exists(fn): return with open(fn, 'wb') as f: for chunk in resp.iter_content(chunk_size=1024): if chunk: size += len(chunk) f.write(chunk) print("{} kb. {}".format(round(size / 1024), fn)) return None def fetch_raw(url, **kwargs): try: resp = requests.get(url, params=kwargs, headers=headers, verify=False) if resp.status_code != 200: return None except: return None return resp.text # Run this with a pool of 5 agents having a chunksize of 3 until finished def parallel_fetch(dataset): print("Fetching {} tiles".format(len(dataset))) agents = 5 chunksize = 3 with Pool(processes=agents) as pool: pool.starmap(fetch_file, dataset, chunksize) def load_image(fn): try: image = Image.open(fn) width, height = image.size return image, width, height except: return None, 0, 0 # Fetch all the tiles from a tile server and then compose them into a single image def grab(s, n): page = "{:03d}{}".format(n, s) out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM) if os.path.exists(out_fn): return print("Fetching {}...".format(page)) os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True) xml_url = "http://www.bl.uk/manuscripts/Proxy.ashx?view={}_f{}.xml".format(TAG, page) xml = fetch_raw(xml_url) if xml is None: print("error with page {}".format(page)) return max_width = int(xml.split('Width="')[1].split('"')[0]) max_height = int(xml.split('Height="')[1].split('"')[0]) TILE_W = round((max_width / ZOOM) / 256) + 1 TILE_H = round((max_height / ZOOM) / 256) + 1 if TILE_W < TILE_H: TILE_H += 1 print("{}x{}".format(TILE_W, TILE_H)) dataset = [] for i in range(0, TILE_W + 1): for j in range(0, TILE_H + 1): url = "http://www.bl.uk/manuscripts/Proxy.ashx?view={}_f{}_files/{}/{}_{}.jpg".format(TAG, page, ZOOM, i, j) fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) if not os.path.exists(fn): dataset.append((url, fn,)) # Fetch all the images we don't have if len(dataset): parallel_fetch(dataset) # Get the dimensions of the final image (interior plus corners) ww = 256 * (TILE_W - 1) hh = 256 * (TILE_H - 1) start_w = 0 end_w = 0 start_h = 0 end_h = 0 fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, 0, 0) image, width, height = load_image(fn) ww += width hh += height start_w = width start_h = height fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W, TILE_H) image, width, height = load_image(fn) ww += width hh += height if image is None: fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W-1, TILE_H) image, width, height = load_image(fn) ww += width fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W, TILE_H-1) image, width, height = load_image(fn) hh += height # Build the new canvas by pasting the tiles across it canvas = Image.new('RGB', (ww, hh,)) x = 0 for i in range(0, TILE_W + 1): y = 0 for j in range(0, TILE_H + 1): fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) image, width, height = load_image(fn) if image: canvas.paste(image, (x, y)) y += 256 x += 256 canvas.save(out_fn) if __name__ == '__main__': os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True) for n in range(1, LAST_PAGE + 1): grab('v', n) grab('r', n)