summaryrefslogtreecommitdiff
path: root/scrape-vam.py
blob: f0a658c47aaac30f4c4be425e2e3b722bcc4acbd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!python

from multiprocessing import Pool
from math import floor
import os
import requests
import time
from PIL import Image
from browser import *

TAG = '2006AW'
FIRST_PAGE = 1773
LAST_PAGE = 1879

ZOOM = 1

# Fetch all the tiles from a tile server and then compose them into a single image
def grab(page):
  out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM)
  if os.path.exists(out_fn):
    return

  print("Fetching {}...".format(page))
  os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True)

  json_url = "https://framemark.vam.ac.uk//collections/2006AW{}/info.json".format(page)
  data = fetch_json(json_url)
  if data is None:
    print("error with page {}".format(page))
    return
  max_width = data['width']
  max_height = data['height']

  TILE_W = round((max_width / ZOOM) / 256)
  TILE_H = round((max_height / ZOOM) / 256)
  print("{}x{}".format(TILE_W, TILE_H))

  dataset = []
  for i in range(0, TILE_W + 1):
    for j in range(0, TILE_H + 1):
      # https://framemark.vam.ac.uk/collections/2006AW1773/768,256,256,256/256,/0/default.jpg
      url = "https://framemark.vam.ac.uk/collections/2006AW{}/{},{},256,256/256,/0/default.jpg".format(page, i * 256, j * 256)
      fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
      if not os.path.exists(fn):
        dataset.append((url, fn,))

  # Fetch all the images we don't have
  if len(dataset):
    parallel_fetch(dataset)

  # Get the dimensions of the final image (interior plus corners)
  ww = max_width
  hh = max_height

  # Build the new canvas by pasting the tiles across it
  canvas = Image.new('RGB', (ww, hh,))
  x = 0
  for i in range(0, TILE_W):
    y = 0
    for j in range(0, TILE_H):
      fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
      image, width, height = load_image(fn)
      if image:
        if i == TILE_W-1:
          width = max_width % 256
          width += 1
        else:
          width = min(width, 256)
        if j == TILE_H-1:
          print(height)
          height = max_height % 256
          print(height)
          # height += 1
        else:
          height = min(height, 256)
        image.thumbnail((width, height,), Image.ANTIALIAS)
        canvas.paste(image, (x, y))
      y += 256
    x += 256
  print("{}x{} {}x{}".format(width, height, image.size[0], image.size[1]))
  canvas.save(out_fn)

if __name__ == '__main__':
  os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True)
  for page in range(FIRST_PAGE, LAST_PAGE + 1):
    grab(page)