summaryrefslogtreecommitdiff
path: root/scrape-vam.py
diff options
context:
space:
mode:
Diffstat (limited to 'scrape-vam.py')
-rw-r--r--scrape-vam.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/scrape-vam.py b/scrape-vam.py
new file mode 100644
index 0000000..8ded1f6
--- /dev/null
+++ b/scrape-vam.py
@@ -0,0 +1,75 @@
+#!python
+
+from multiprocessing import Pool
+from math import floor
+import os
+import requests
+import time
+from PIL import Image
+from browser import *
+
+TAG = '2006AW'
+FIRST_PAGE = 1773
+LAST_PAGE = 1879
+
+ZOOM = 1
+
+# Fetch all the tiles from a tile server and then compose them into a single image
+def grab(page):
+ out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM)
+ if os.path.exists(out_fn):
+ return
+
+ print("Fetching {}...".format(page))
+ os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True)
+
+ json_url = "https://framemark.vam.ac.uk//collections/2006AW{}/info.json".format(page)
+ data = fetch_json(json_url)
+ if data is None:
+ print("error with page {}".format(page))
+ return
+ max_width = data['width']
+ max_height = data['height']
+
+ TILE_W = round((max_width / ZOOM) / 256) + 1
+ TILE_H = round((max_height / ZOOM) / 256) + 1
+ if TILE_W < TILE_H:
+ TILE_H += 1
+ print("{}x{}".format(TILE_W, TILE_H))
+
+ dataset = []
+ for i in range(0, TILE_W + 1):
+ for j in range(0, TILE_H + 1):
+ # https://framemark.vam.ac.uk/collections/2006AW1773/768,256,256,256/256,/0/default.jpg
+ url = "https://framemark.vam.ac.uk/collections/2006AW{}/{},{},256,256/256,/0/default.jpg".format(page, i * 256, j * 256)
+ fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
+ if not os.path.exists(fn):
+ dataset.append((url, fn,))
+
+ # Fetch all the images we don't have
+ if len(dataset):
+ parallel_fetch(dataset)
+
+ # Get the dimensions of the final image (interior plus corners)
+ ww = max_width
+ hh = max_height
+
+ # Build the new canvas by pasting the tiles across it
+ canvas = Image.new('RGB', (ww, hh,))
+ x = 0
+ for i in range(0, TILE_W + 1):
+ y = 0
+ for j in range(0, TILE_H + 1):
+ fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
+ image, width, height = load_image(fn)
+ if image:
+ canvas.paste(image, (x, y))
+ y += 256
+ x += 256
+ canvas.save(out_fn)
+
+if __name__ == '__main__':
+ os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True)
+ for page in range(FIRST_PAGE, LAST_PAGE + 1):
+ grab(page)
+