diff options
| author | tim b <timb@camcompu.home> | 2010-09-26 21:54:33 -0700 |
|---|---|---|
| committer | tim b <timb@camcompu.home> | 2010-09-26 21:54:33 -0700 |
| commit | d9266105bd0171c7d36a05ce4bb1b2bcc690e114 (patch) | |
| tree | 7dc40420ab131d1ec89c8eeff31b9e220aed8b78 /scripts | |
| parent | 223b83a5ad0eeb909349c0fb1213e1ac2be85d16 (diff) | |
make stored urls smaller. make s3 urls equiv to dump urls
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/fill.image_urls.py | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/scripts/fill.image_urls.py b/scripts/fill.image_urls.py index 952b7ea..535d6f8 100644 --- a/scripts/fill.image_urls.py +++ b/scripts/fill.image_urls.py @@ -1,5 +1,6 @@ # this needs python 3 +import re import sys import postgresql from urllib.parse import urlparse @@ -49,6 +50,22 @@ def get_urls_from_messages(messages): urls.extend(get_images_from_messages(message[0])) return urls +# this does 3 things... +# convert 'http://dumpfm.s3.amazonaws.com' to 'http://dump.fm' +# drops 'http://' from urls +# drops 'dump.fm/images' from urls +# the client is expected to rebuild urls based on this heuristic: +# if the url starts with '/', prepend 'http://dump.fm' +# otherwise, prepend 'http://' +def make_url_smaller(url): + if url[:37] == 'http://dumpfm.s3.amazonaws.com/images': + url = 'http://dump.fm/images' + url[37:] + if url[:21] == 'http://dump.fm/images': + url = [21:] + else: + url = url[7:] + return url + if __name__ == "__main__": if not len(sys.argv) == 3: print('usage: fill.image_urls.py message_id_start message_id_end') @@ -61,7 +78,7 @@ if __name__ == "__main__": messages = get_messages(lower, upper) urls = get_urls_from_messages(messages) for url in urls: - add_url(url) + add_url(make_url_smaller(url)) print("added ", len(urls), " images to db") num_new_images = get_num_images_in_db() - num_existing_images percent_new_images = num_new_images / len(urls) * 100 |
