1 files changed, 150 insertions, 0 deletions
diff --git a/megapixels/commands/msc/append_embassy_profile.py b/megapixels/commands/msc/append_embassy_profile.py
new file mode 100644
index 00000000..7d301f06
--- /dev/null
+++ b/megapixels/commands/msc/append_embassy_profile.py
@@ -0,0 +1,150 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output file')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.option('--api_key', 'opt_api_key', envvar='FLICKR_API_KEY_1')
+@click.option('--api_secret', 'opt_api_secret', envvar='FLICKR_API_SECRET_1')
+@click.option('-d', '--delay', 'opt_delay', default=None, type=float,
+  help='Delay between API calls to prevent rate-limiting')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_api_key, opt_api_secret, 
+  opt_delay):
+  """Fetches Flickr API for embassy. Saves to JSON"""
+  
+  from tqdm import tqdm
+  from glob import glob
+  import time
+  import json
+  import os, sys
+  from random import randint
+  import urllib.request
+  import flickr_api  # pip install flickr_api
+  from flickr_api.flickrerrors import FlickrAPIError
+  from requests.compat import urljoin, quote_plus
+
+  
+  # -------------------------------------------------
+  # process  
+  
+  if not opt_api_key or not opt_api_secret:
+    log.error('source ../env/flickr.env vars for Flickr API and try again')
+    return
+
+  # check how many flickr keys
+  api_keys = []
+  api_secrets = []
+  for i in range(1,20):
+    try:
+      var_name_key = f'FLICKR_API_KEY_{i}'
+      var_name_secret = f'FLICKR_API_SECRET_{i}'
+      if os.environ[var_name_key] and os.environ[var_name_secret]:
+        api_keys.append(os.environ[var_name_key])
+        api_secrets.append(os.environ[var_name_secret])
+    except Exception as e:
+      pass
+
+  log.info(f'Shuffling between: {len(api_keys)} api keys')
+
+  # read in CSV
+  # | username, ... |
+  df_records = pd.read_csv(opt_fp_in)
+  log.info(f'Dedpuplicating {len(df_records)}')
+  df_records = df_records.drop_duplicates(subset='url', keep="last")
+  log.info(f'Dedpuplicated {len(df_records)}')
+  records = df_records.to_dict('records')
+  
+  if opt_slice:
+    records = records[opt_slice[0]:opt_slice[1]]
+  
+  log.info('Processing: {:,} items'.format(len(records)))
+
+  identities = []
+  
+  for record in tqdm(records):
+    if not record.get('nsid', None):
+      log.warn(f'No NSID for {record["url"]}')
+      continue
+    try:
+      # shuffle the api keys to avoid rate limiting
+      error_msg = ''
+      rand_int = randint(0,len(api_keys)-1)
+      api_key = api_keys[rand_int]
+      api_secret = api_secrets[rand_int]
+      
+      nsid = record['nsid']
+      nsid_encoded = urllib.parse.quote_plus(nsid)
+      flickr_url = 'https://flickr.com/services/rest/?method=flickr.profile.getProfile'
+      flickr_url += f'&api_key={api_key}'
+      flickr_url += f'&user_id={nsid_encoded}'
+      flickr_url += '&format=json'
+      flickr_url += '&nojsoncallback=1'
+
+      with urllib.request.urlopen(flickr_url) as url:
+        data = json.loads(url.read().decode())
+
+      if data['stat'] =='fail':
+        error_msg = data["message"]
+        raise Exception(error_msg)
+      elif data['stat'] =='ok':
+        profile = data.get('profile')
+        # append data
+        tags = ['join_date', 'occupation', 'hometown', 'first_name', 'last_name']
+        tags += ['profile_description', 'city', 'country', 'twitter', 'facebook', 'instagram']
+        for tag in tags:
+          record.setdefault(tag, profile.get(tag))
+      
+    except Exception as e:
+      log.error(f'Exception: {e}, message: {error_msg}, url: {flickr_url}')
+    
+
+  # write data
+  df_records = pd.DataFrame.from_dict(records)
+  df_records.to_csv(opt_fp_out, index=False)
+
+
+"""
+{
+  "profile": {
+    "id": "129819216@N03",
+    "nsid": "129819216@N03",
+    "join_date": "1417769829",
+    "occupation": "",
+    "hometown": "",
+    "showcase_set": "72157680742231281",
+    "showcase_set_title": "Profile Showcase",
+    "first_name": "Ambasciata",
+    "last_name": "d'Italia a Praga",
+    "profile_description": "",
+    "city": "",
+    "country": "",
+    "facebook": "",
+    "twitter": "",
+    "tumblr": "",
+    "instagram": "",
+    "pinterest": ""
+  },
+  "stat": "ok"
+}
+"""
+\ No newline at end of file