summaryrefslogtreecommitdiff
path: root/megapixels/commands/msc/append_embassies.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/msc/append_embassies.py')
-rw-r--r--megapixels/commands/msc/append_embassies.py126
1 files changed, 126 insertions, 0 deletions
diff --git a/megapixels/commands/msc/append_embassies.py b/megapixels/commands/msc/append_embassies.py
new file mode 100644
index 00000000..2e659344
--- /dev/null
+++ b/megapixels/commands/msc/append_embassies.py
@@ -0,0 +1,126 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output file')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+ help='Slice list of files')
+@click.option('--api_key', 'opt_api_key', envvar='FLICKR_API_KEY_1')
+@click.option('--api_secret', 'opt_api_secret', envvar='FLICKR_API_SECRET_1')
+@click.option('-d', '--delay', 'opt_delay', default=None, type=float,
+ help='Delay between API calls to prevent rate-limiting')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_api_key, opt_api_secret,
+ opt_delay):
+ """Fetches Flickr API for embassy. Saves to JSON"""
+
+ from tqdm import tqdm
+ from glob import glob
+ import time
+ import json
+ import os, sys
+ from random import randint
+ import urllib.request
+ import flickr_api # pip install flickr_api
+ from flickr_api.flickrerrors import FlickrAPIError
+ from requests.compat import urljoin, quote_plus
+
+
+ # -------------------------------------------------
+ # process
+
+ if not opt_api_key or not opt_api_secret:
+ log.error('source ../env/flickr.env vars for Flickr API and try again')
+ return
+
+ # check how many flickr keys
+ api_keys = []
+ api_secrets = []
+ for i in range(1,10):
+ try:
+ var_name_key = f'FLICKR_API_KEY_{i}'
+ var_name_secret = f'FLICKR_API_SECRET_{i}'
+ if os.environ[var_name_key] and os.environ[var_name_secret]:
+ api_keys.append(os.environ[var_name_key])
+ api_secrets.append(os.environ[var_name_secret])
+ except Exception as e:
+ pass
+
+ log.info(f'Shuffling between: {len(api_keys)} api keys')
+
+ # read in CSV
+ # | username, ... |
+ df_records = pd.read_csv(opt_fp_in)
+ log.info(f'Dedpuplicating {len(df_records)}')
+ df_records = df_records.drop_duplicates(subset='url', keep="last")
+ log.info(f'Dedpuplicated {len(df_records)}')
+ records = df_records.to_dict('records')
+
+ if opt_slice:
+ records = records[opt_slice[0]:opt_slice[1]]
+
+ log.info('Processing: {:,} items'.format(len(records)))
+
+ identities = []
+
+ for record in tqdm(records):
+ if record.get('nsid', None):
+ continue
+ try:
+ # shuffle the api keys to avoid rate limiting
+ rand_int = randint(0,len(api_keys)-1)
+ api_key = api_keys[rand_int]
+ api_secret = api_secrets[rand_int]
+ """
+ { "user": { "id": "46768316@N07",
+ "username": { "_content": "U.S. Embassy Tirana Art Contest" } }, "stat": "ok" }
+ """
+
+ # https://www.flickr.com/services/rest/
+ # ?method=flickr.urls.lookupUser&api_key=xxx&url=[encoded url]&format=json&nojsoncallback=1
+ url = record['url']
+ url_encoded = quote_plus(url)
+ flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.urls.lookupUser'
+ flickr_url += f'&api_key={api_key}'
+ flickr_url += f'&url={url_encoded}'
+ flickr_url += '&format=json'
+ flickr_url += '&nojsoncallback=1'
+ log.debug(f'{flickr_url}')
+ with urllib.request.urlopen(flickr_url) as url:
+ data = json.loads(url.read().decode())
+
+ if data['stat'] =='fail':
+ error_msg = data["message"]
+ log.error(f'Failed. Message: {error_msg}, url: {flickr_url}')
+ raise Exception(error_msg)
+ elif data['stat'] =='ok':
+ user_data = data.get('user')
+ record['nsid'] = user_data.get('id')
+ record['username'] = user_data.get('username').get('_content')
+
+ except Exception as e:
+ log.error(f'Exception: {e}, url: {flickr_url}')
+
+
+ # write data
+ df_records = pd.DataFrame.from_dict(records)
+ df_records.to_csv(opt_fp_out)