summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/flickr_api.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/flickr_api.py')
-rw-r--r--megapixels/commands/datasets/flickr_api.py202
1 files changed, 202 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/flickr_api.py b/megapixels/commands/datasets/flickr_api.py
new file mode 100644
index 00000000..780ede49
--- /dev/null
+++ b/megapixels/commands/datasets/flickr_api.py
@@ -0,0 +1,202 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+query_types = ['photo_id', 'album_id', 'flickr_id']
+# flickr_id: 123456789@N01
+# photo_id:
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output directory')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+ help='Slice list of files')
+@click.option('--query-type', 'opt_query_type', default='photo_id',
+ type=click.Choice(query_types),
+ help='API query type')
+@click.option('--api_key', 'opt_api_key', envvar='FLICKR_API_KEY_1')
+@click.option('--api_secret', 'opt_api_secret', envvar='FLICKR_API_SECRET_1')
+@click.option('-d', '--delay', 'opt_delay', default=None, type=float,
+ help='Delay between API calls to prevent rate-limiting')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_api_key, opt_api_secret,
+ opt_delay, opt_query_type):
+ """Fetches Flickr API for user info. Saves to JSON"""
+
+ from tqdm import tqdm
+ from glob import glob
+ import time
+ import json
+ import os, sys
+ from random import randint
+ import urllib.request
+ import flickr_api # pip install flickr_api
+ from flickr_api.flickrerrors import FlickrAPIError
+ from requests.compat import urljoin, quote_plus
+
+
+ # -------------------------------------------------
+ # process
+
+ if not opt_api_key or not opt_api_secret:
+ log.error('source .env vars for Flickr API and try again')
+ return
+
+ # check how many flickr keys
+ api_keys = []
+ api_secrets = []
+ for i in range(1,10):
+ try:
+ var_name_key = f'FLICKR_API_KEY_{i}'
+ var_name_secret = f'FLICKR_API_SECRET_{i}'
+ if os.environ[var_name_key] and os.environ[var_name_secret]:
+ api_keys.append(os.environ[var_name_key])
+ api_secrets.append(os.environ[var_name_secret])
+ except Exception as e:
+ pass
+
+ log.info(f'Shuffling between: {len(api_keys)} api keys')
+
+ # read in CSV
+ # | query, filepath |
+
+ records = pd.read_csv(opt_fp_in).to_dict('records')
+ if opt_slice:
+ records = records[opt_slice[0]:opt_slice[1]]
+
+ log.info('Processing: {:,} items'.format(len(records)))
+
+ identities = []
+
+
+ for record in tqdm(records):
+ fp_out = join(opt_fp_out, record['filepath'])
+ fp_out_err = fp_out + '_error.txt'
+ if Path(fp_out).is_file() or Path(fp_out_err).is_file():
+ continue
+ # append relevant data
+ try:
+ # shuffle the api keys to avoid rate limiting
+ rand_int = randint(0,len(api_keys)-1)
+ api_key = api_keys[rand_int]
+ api_secret = api_secrets[rand_int]
+
+ #flickr_api.set_keys(api_key=api_key, api_secret=api_secret)
+
+ #photo = flickr_api.Photo(id=record['query'])
+ # https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=18dacfe5a2c45b794ee4aa832eb35e83&photo_id=6796778203&format=json&nojsoncallback=1
+ photo_id = record['query']
+ flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo'
+ flickr_url += f'&api_key={api_key}'
+ flickr_url += f'&photo_id={photo_id}'
+ flickr_url += '&format=json'
+ flickr_url += '&nojsoncallback=1'
+
+ with urllib.request.urlopen(flickr_url) as url:
+ data = json.loads(url.read().decode())
+
+ if data['stat'] =='fail':
+ raise Exception('failed')
+ elif data['stat'] =='ok':
+ with open(fp_out, 'w') as fp:
+ json.dump(data, fp, sort_keys=True, indent=2)
+
+ #except FlickrAPIError as e:
+ except Exception as e:
+ # if "HTTP Server Error 500" in str(e):
+ log.error(f'{e}, {record["query"]}, api_key: {api_key}, api_secret: {api_secret}')
+ if "not found" in str(e) or 'failed' in str(e):
+ with open(fp_out_err, 'w') as fp:
+ fp.write('')
+
+ if opt_delay:
+ time.sleep(opt_delay)
+
+
+
+"""
+df_id_dict.update( {
+ 'user_name': info.get('username', ''),
+ 'location': info.get('location', ''),
+ 'real_name': info.get('realname', ''),
+ 'time_zone': info.get('timezone', {}).get('timezone_id', ''),
+ 'time_first_photo': info.get('photos_info', {}).get('firstdatetaken'),
+ 'photos_count': info.get('photos_info', {}).get('count'),
+ 'description': info.get('description', ''),
+ 'id': info.get('id'),
+ 'path_alias': info.get('path_alias', ''),
+ 'is_pro': info.get('ispro', ''),
+ 'url_photos': info.get('photosurl', ''),
+ 'url_profile': info.get('photosurl', ''),
+ 'url_mobile': info.get('mobileurl', ''),
+ })
+"""
+
+"""
+info = photo.getInfo()
+
+# serialize tags
+info['tag_names'] = []
+info['tag_ids'] = []
+tags = info['tags']
+for t in tags:
+ info['tag_names'].append(str(t.text))
+ info['tag_ids'].append(str(t.id))
+
+owner = info['owner']
+info['owner_id'] = str(owner.id)
+info['owner_username'] = str(owner.username)
+
+info.pop('tags')
+info.pop('owner')
+
+"""
+
+"""
+Example API data:
+{'id': '7124086@N07',
+ 'nsid': '7124086@N07',
+ 'ispro': 1,
+ 'can_buy_pro': 0,
+ 'iconserver': '2325',
+ 'iconfarm': 3,
+ 'path_alias': 'shirleylin',
+ 'has_stats': '1',
+ 'pro_badge': 'standard',
+ 'expire': '0',
+ 'username': 'ShirleyLin',
+ 'realname': 'Shirley Lin',
+ 'location': 'Fremont, California, US',
+ 'timezone': {'label': 'Pacific Time (US & Canada); Tijuana',
+ 'offset': '-08:00',
+ 'timezone_id': 'PST8PDT'},
+ 'description': '',
+ 'photosurl': 'https://www.flickr.com/photos/shirleylin/',
+ 'profileurl': 'https://www.flickr.com/people/shirleylin/',
+ 'mobileurl': 'https://m.flickr.com/photostream.gne?id=7102756',
+ 'photos_info': {'firstdatetaken': '2004-05-24 12:12:15',
+ 'firstdate': '1172556588',
+ 'count': 9665}}
+"""
+
+"""
+https://www.flickr.com/services/api/explore/flickr.photosets.getPhotos
+https://www.flickr.com/services/api/explore/flickr.photos.getInfo
+""" \ No newline at end of file