summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/fill.image_urls.py19
1 files changed, 18 insertions, 1 deletions
diff --git a/scripts/fill.image_urls.py b/scripts/fill.image_urls.py
index 952b7ea..535d6f8 100644
--- a/scripts/fill.image_urls.py
+++ b/scripts/fill.image_urls.py
@@ -1,5 +1,6 @@
# this needs python 3
+import re
import sys
import postgresql
from urllib.parse import urlparse
@@ -49,6 +50,22 @@ def get_urls_from_messages(messages):
urls.extend(get_images_from_messages(message[0]))
return urls
+# this does 3 things...
+# convert 'http://dumpfm.s3.amazonaws.com' to 'http://dump.fm'
+# drops 'http://' from urls
+# drops 'dump.fm/images' from urls
+# the client is expected to rebuild urls based on this heuristic:
+# if the url starts with '/', prepend 'http://dump.fm'
+# otherwise, prepend 'http://'
+def make_url_smaller(url):
+ if url[:37] == 'http://dumpfm.s3.amazonaws.com/images':
+ url = 'http://dump.fm/images' + url[37:]
+ if url[:21] == 'http://dump.fm/images':
+ url = [21:]
+ else:
+ url = url[7:]
+ return url
+
if __name__ == "__main__":
if not len(sys.argv) == 3:
print('usage: fill.image_urls.py message_id_start message_id_end')
@@ -61,7 +78,7 @@ if __name__ == "__main__":
messages = get_messages(lower, upper)
urls = get_urls_from_messages(messages)
for url in urls:
- add_url(url)
+ add_url(make_url_smaller(url))
print("added ", len(urls), " images to db")
num_new_images = get_num_images_in_db() - num_existing_images
percent_new_images = num_new_images / len(urls) * 100