1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
#!python
from multiprocessing import Pool
import os
import requests
import time
from PIL import Image
TAG = 'arundel_ms_263'
LAST_PAGE = 283
ZOOM = 11
TILE_W = 4
TILE_H = 3
# ZOOM = 13
# TILE_W = 16
# TILE_H = 12
# ZOOM = 14
# TILE_W = 33
# TILE_H = 24
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
}
def fetch_file(url, fn, **kwargs):
try:
resp = requests.get(url, params=kwargs, headers=headers, verify=False)
if resp.status_code != 200:
return None
except:
return None
size = 0
if os.path.exists(fn):
return
with open(fn, 'wb') as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
size += len(chunk)
f.write(chunk)
print("{} kb. {}".format(round(size / 1024), fn))
return None
# Run this with a pool of 5 agents having a chunksize of 3 until finished
def parallel_fetch(dataset):
print("Fetching {} tiles".format(len(dataset)))
agents = 5
chunksize = 3
with Pool(processes=agents) as pool:
pool.starmap(fetch_file, dataset, chunksize)
def load_image(fn):
try:
image = Image.open(fn)
width, height = image.size
return image, width, height
except:
return None, 0, 0
# Fetch all the tiles from a tile server and then compose them into a single image
def grab(s, n):
page = "{:03d}{}".format(n, s)
out_fn = "./{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, ZOOM)
if os.path.exists(out_fn):
return
print("Fetching {}...".format(page))
os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True)
dataset = []
for i in range(0, TILE_W + 1):
for j in range(0, TILE_H + 1):
url = "http://www.bl.uk/manuscripts/Proxy.ashx?view={}_f{}_files/{}/{}_{}.jpg".format(TAG, page, ZOOM, i, j)
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
if not os.path.exists(fn):
dataset.append((url, fn,))
# Fetch all the images we don't have
if len(dataset):
parallel_fetch(dataset)
# Get the dimensions of the final image (interior plus corners)
ww = 258 * (TILE_W - 1)
hh = 258 * (TILE_H - 1)
start_w = 0
end_w = 0
start_h = 0
end_h = 0
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, 0, 0)
image, width, height = load_image(fn)
ww += width
hh += height
start_w = width
start_h = height
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W, TILE_H)
image, width, height = load_image(fn)
ww += width
hh += height
if image is None:
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W-1, TILE_H)
image, width, height = load_image(fn)
ww += width
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, TILE_W, TILE_H-1)
image, width, height = load_image(fn)
hh += height
# Build the new canvas by pasting the tiles across it
canvas = Image.new('RGB', (ww, hh,))
x = 0
for i in range(0, TILE_W + 1):
y = 0
for j in range(0, TILE_H + 1):
fn = "./{}/{}/{}/{}_{}.jpg".format(TAG, ZOOM, page, i, j)
image, width, height = load_image(fn)
if image:
canvas.paste(image, (x, y))
if j == 0:
y += start_h
else:
y += height
if i == 0:
x += start_w
else:
x += width
canvas.save(out_fn)
if __name__ == '__main__':
os.makedirs("{}/{}".format(TAG, ZOOM), exist_ok=True)
for n in range(1, LAST_PAGE + 1):
grab('v', n)
grab('r', n)
|