""" Converts directory of JSON API output files to CSV format """ from glob import glob import os from os.path import join from pathlib import Path import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils import logger_utils import pandas as pd from PIL import Image, ImageOps, ImageFilter from app.utils import file_utils, im_utils query_types = ['nsid_profile', 'nsid_url', 'photo_id'] log = logger_utils.Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input directory') @click.option('-o', '--output', 'opt_fp_out', required=True, help='Output file') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite') @click.option('-q', '--query', 'opt_query_type', type=click.Choice(query_types), required=True) @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force, opt_query_type): """Fetches Flickr API for user info. Saves to JSON""" from tqdm import tqdm from glob import glob import json # ------------------------------------------------- # process if Path(opt_fp_out).is_file() and not opt_force: log.error('File exists. Use "--force" to overwrite it') return fp_files = glob(join(opt_fp_in, '*.json')) fp_files = [f for f in fp_files if 'error' not in f] if opt_slice: fp_files = fp_files[opt_slice[0]:opt_slice[1]] log.debug(f'Found {len(fp_files)} files') items = [] for fp_file in tqdm(fp_files): if opt_query_type == 'photo_id': try: photo = file_utils.load_json(fp_file).get('photo') except Exception as e: log.error(f'{e}, skipping: {fp_file}') continue dates = photo.get('dates') posted = dates.get('posted') taken = dates.get('taken') description = photo.get('description').get('_content') location = photo.get('location', {}) country = location.get('country', {}) location_country = country.get('_country', '') location_place = country.get('place_id', '') location_woeid = country.get('woeid', '') location_lat = location.get('latitude', '') location_lon = location.get('longitude', '') location_place_id = location.get('place_id', '') owner = photo.get('owner') nsid = owner.get('nsid') path_alias = owner.get('path_alias') owner_realname = owner.get('realname') owner_username = owner.get('username') owner_location = owner.get('location') photo_id = Path(fp_file).stem server = photo.get('server') farm = photo.get('farm') secret = photo.get('secret') # https://farm4.staticflickr.com/3214/3036412907_65deee68e2.jpg image_url = f'https://farm{farm}.staticflickr.com/{server}/{photo_id}_{secret}.jpg' obj = { 'posted': posted, 'taken': taken, 'description': description, 'country': location_country, 'place': location_place, 'woeid': location_woeid, 'lat': location_lat, 'lon': location_lon, 'place_id': location_place_id, 'nsid': nsid, 'path_alias': path_alias, 'realname': owner_realname, 'username': owner_username, 'owner_location': owner_location, 'photo_id': photo_id, 'secret': secret, 'url': image_url } elif opt_query_type == 'nsid_profile': obj = file_utils.load_json(fp_file).get('profile') obj.pop('showcase_set') obj.pop('showcase_set_title') obj.pop('pinterest') obj.pop('tumblr') elif opt_query_type == 'nsid_url': obj = file_utils.load_json(fp_file).get('user') elif opt_query_type == 'user_profile': metadata = file_utils.load_json(fp_file).get('photo') owner = metadata.get('owner') path_alias = owner.get('path_alias') nsid = owner.get('nsid') username = owner.get('username') realname = owner.get('realname') description = metadata.get('description').get('_content') title = metadata.get('title').get('_content') location = metadata.get('location') dates = metadata.get('dates') date_taken = dates.get('taken') date_posted = dates.get('posted') fname = Path(fp_file).stem obj = { 'photo_id': fname, 'nsid': nsid, 'path_alias': path_alias, 'username': username, 'realname': realname, 'title': title, 'description': description, 'location': location, 'date_taken': date_taken, 'date_posted': date_posted } items.append(obj) # conver to DataFrame df = pd.DataFrame.from_dict(items) df.to_csv(opt_fp_out, index=False) log.info(f'Wrote {len(df)} to {opt_fp_out}') """ nsid_url { "stat": "ok", "user": { "nsid": "7153718@N04", "url": "https://www.flickr.com/people/babyfish4/" } } """ """ location: of the owner dateuploaded license "dates": "lastupdate": "1416447096" "posted": "1112900873" "taken": "2005-04-06 18:37:38" description: _content: playing in a field title: _content: jessica location: cornwall, uk """ """ { "profile": { "city": null, "country": null, "facebook": "", "first_name": null, "hometown": "", "id": "7153718@N04", "instagram": "", "join_date": "1172669959", "last_name": null, "nsid": "7153718@N04", "occupation": "", "pinterest": "", "profile_description": "", "showcase_set": "72157680616398790", "showcase_set_title": "Profile Showcase", "tumblr": "", "twitter": "" }, "stat": "ok" } """ """ photo_id { "photo": { "comments": { "_content": "0" }, "dates": { "lastupdate": "0", "posted": "1094612969", "taken": "2004-09-04 22:41:18", "takengranularity": "0", "takenunknown": 0 }, "dateuploaded": "1094612969", "description": { "_content": "" }, "editability": { "canaddmeta": 0, "cancomment": 0 }, "farm": 1, "geoperms": { "iscontact": 0, "isfamily": 0, "isfriend": 0, "ispublic": 1 }, "id": "371498", "isfavorite": 0, "license": "1", "location": { "accuracy": "15", "context": "0", "country": { "_content": "United States", "place_id": "nz.gsghTUb4c2WAecA", "woeid": "23424977" }, "county": { "_content": "Tompkins", "place_id": "1uCJJtBQUL80G6hbPw", "woeid": "12589366" }, "latitude": "42.399028", "longitude": "-76.652519", "place_id": "1uCJJtBQUL80G6hbPw", "region": { "_content": "New York", "place_id": "ODHTuIhTUb75gdBu", "woeid": "2347591" }, "woeid": "12589366" }, "media": "photo", "notes": { "note": [] }, "originalformat": "jpg", "originalsecret": "704f392686", "owner": { "iconfarm": 1, "iconserver": "1", "location": "Los Angeles, CA, USA", "nsid": "48600072071@N01", "path_alias": "barb", "realname": "Barb Dybwad", "username": "doctor paradox" }, "people": { "haspeople": 0 }, "publiceditability": { "canaddmeta": 0, "cancomment": 1 }, "rotation": 0, "safety_level": "0", "secret": "704f392686", "server": "1", "tags": { "tag": [ { "_content": "unfound", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-9017", "machine_tag": 0, "raw": "unfound" }, { "_content": "digicam", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-40406", "machine_tag": 0, "raw": "digicam" }, { "_content": "upstateny", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-9655", "machine_tag": 0, "raw": "upstateny" }, { "_content": "musefest", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-72456", "machine_tag": 0, "raw": "musefest" }, { "_content": "musicfestival", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-72628", "machine_tag": 0, "raw": "musicfestival" }, { "_content": "people", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-290", "machine_tag": 0, "raw": "people" }, { "_content": "portrait", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-278", "machine_tag": 0, "raw": "portrait" }, { "_content": "maco", "author": "48600072071@N01", "authorname": "doctor paradox", "id": "28255-371498-19439", "machine_tag": 0, "raw": "maco" } ] }, "title": { "_content": "maco2" }, "urls": { "url": [ { "_content": "https://www.flickr.com/photos/barb/371498/", "type": "photopage" } ] }, "usage": { "canblog": 0, "candownload": 1, "canprint": 0, "canshare": 1 }, "views": "290", "visibility": { "isfamily": 0, "isfriend": 0, "ispublic": 1 } }, "stat": "ok" } """