From 353b7de86f790a2ee03f2573218d15d51a855043 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 26 Nov 2018 22:31:46 +0100 Subject: scrape vam --- scrape-vam.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 scrape-vam.py (limited to 'scrape-vam.py') diff --git a/scrape-vam.py b/scrape-vam.py new file mode 100644 index 0000000..8ded1f6 --- /dev/null +++ b/scrape-vam.py @@ -0,0 +1,75 @@ +#!python + +from multiprocessing import Pool +from math import floor +import os +import requests +import time +from PIL import Image +from browser import * + +TAG = '2006AW' +FIRST_PAGE = 1773 +LAST_PAGE = 1879 + +ZOOM = 1 + +# Fetch all the tiles from a tile server and then compose them into a single image +def grab(page): + out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM) + if os.path.exists(out_fn): + return + + print("Fetching {}...".format(page)) + os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True) + + json_url = "https://framemark.vam.ac.uk//collections/2006AW{}/info.json".format(page) + data = fetch_json(json_url) + if data is None: + print("error with page {}".format(page)) + return + max_width = data['width'] + max_height = data['height'] + + TILE_W = round((max_width / ZOOM) / 256) + 1 + TILE_H = round((max_height / ZOOM) / 256) + 1 + if TILE_W < TILE_H: + TILE_H += 1 + print("{}x{}".format(TILE_W, TILE_H)) + + dataset = [] + for i in range(0, TILE_W + 1): + for j in range(0, TILE_H + 1): + # https://framemark.vam.ac.uk/collections/2006AW1773/768,256,256,256/256,/0/default.jpg + url = "https://framemark.vam.ac.uk/collections/2006AW{}/{},{},256,256/256,/0/default.jpg".format(page, i * 256, j * 256) + fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) + if not os.path.exists(fn): + dataset.append((url, fn,)) + + # Fetch all the images we don't have + if len(dataset): + parallel_fetch(dataset) + + # Get the dimensions of the final image (interior plus corners) + ww = max_width + hh = max_height + + # Build the new canvas by pasting the tiles across it + canvas = Image.new('RGB', (ww, hh,)) + x = 0 + for i in range(0, TILE_W + 1): + y = 0 + for j in range(0, TILE_H + 1): + fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j) + image, width, height = load_image(fn) + if image: + canvas.paste(image, (x, y)) + y += 256 + x += 256 + canvas.save(out_fn) + +if __name__ == '__main__': + os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True) + for page in range(FIRST_PAGE, LAST_PAGE + 1): + grab(page) + -- cgit v1.2.3-70-g09d2