1 files changed, 26 insertions, 10 deletions
diff --git a/scrape-codex.py b/scrape-codex.py
index 6a7f9b9..c452a62 100644
--- a/scrape-codex.py
+++ b/scrape-codex.py
@@ -43,6 +43,15 @@ def fetch_file(url, fn, **kwargs):
   print("{} kb. {}".format(round(size / 1024), fn))
   return None
 
+def fetch_raw(url, fn, **kwargs):
+  try:
+    resp = requests.get(url, params=kwargs, headers=headers, verify=False)
+    if resp.status_code != 200:
+      return None
+  except:
+    return None
+  return resp.text
+
 # Run this with a pool of 5 agents having a chunksize of 3 until finished
 def parallel_fetch(dataset):
   print("Fetching {} tiles".format(len(dataset)))
@@ -68,6 +77,19 @@ def grab(s, n):
 
   print("Fetching {}...".format(page))
   os.makedirs("./{}/{}/{}".format(TAG, ZOOM, page), exist_ok=True)
+
+  xml_url = "http://www.bl.uk/manuscripts/Proxy.ashx?view={}_f{}.xml".format(TAG, page)
+  xml = fetch_raw(xml_url)
+  if xml is None:
+    print("error with page {}".format(page))
+    return
+  max_width = int(xml.split('Width="')[1].split('"')[0])
+  max_height = int(xml.split('Height="')[1].split('"')[0])
+
+  TILE_W = floor(max_width / ZOOM) + 1
+  TILE_H = floor(max_height / ZOOM) + 1
+  print("{}x{}".format(TILE_W, TILE_H))
+
   dataset = []
   for i in range(0, TILE_W + 1):
     for j in range(0, TILE_H + 1):
@@ -81,8 +103,8 @@ def grab(s, n):
     parallel_fetch(dataset)
 
   # Get the dimensions of the final image (interior plus corners)
-  ww = 258 * (TILE_W - 1)
-  hh = 258 * (TILE_H - 1)
+  ww = 256 * (TILE_W - 1)
+  hh = 256 * (TILE_H - 1)
 
   start_w = 0
   end_w = 0
@@ -120,14 +142,8 @@ def grab(s, n):
       image, width, height = load_image(fn)
       if image:
         canvas.paste(image, (x, y))
-        if j == 0:
-          y += start_h
-        else:
-          y += height
-    if i == 0:
-      x += start_w
-    else:
-      x += width
+      y += height
+    x += width
   canvas.save(out_fn)
 
 if __name__ == '__main__':