""" Converts directory of JSON API output files to CSV format """ from glob import glob import os from os.path import join from pathlib import Path import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils import logger_utils import pandas as pd from PIL import Image, ImageOps, ImageFilter from app.utils import file_utils, im_utils data_types = ['nsid_url', 'nsid_profile'] log = logger_utils.Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input directory') @click.option('-o', '--output', 'opt_fp_out', required=True, help='Output file') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') @click.option('-q', '--query', 'opt_query', required=True, type=click.Choice(data_types), help='Flickr API data type') @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force, opt_query): """Fetches Flickr API for user info. Saves to JSON""" from tqdm import tqdm from glob import glob import json # ------------------------------------------------- # process if Path(opt_fp_out).is_file() and not opt_force: log.error('File exists. Use "--force" to overwrite it') return fp_files = glob(join(opt_fp_in, '*.json')) fp_files = [f for f in fp_files if 'error' not in f] if opt_slice: fp_files = fp_files[opt_slice[0]:opt_slice[1]] log.debug(f'Found {len(fp_files)} files') items = [] """ { "stat": "ok", "user": { "id": "95216244@N04", "username": { "_content": "AfghanistanWien" } } } """ """ { "profile": { "city": "Oslo", "country": "Norway", "facebook": "", "first_name": "US Embassy", "hometown": "Oslo", "id": "133886098@N05", "instagram": "", "join_date": "1436521589", "last_name": "Oslo", "nsid": "133886098@N05", "occupation": "", "pinterest": "", "profile_description": "This is the official Flickr profile of the U.S. Embassy in Oslo, Norway. Contact us: osloirc@state.gov.", "showcase_set": "72157677372281094", "showcase_set_title": "Profile Showcase", "tumblr": "", "twitter": "", "website": "http://norway.usembassy.gov/index.html" }, "stat": "ok" } """ # Convert to |nsid|username| for fp_file in tqdm(fp_files): metadata = file_utils.load_json(fp_file) if opt_query == 'nsid_url': path_alias = Path(fp_file).stem metadata = metadata.get('user') nsid = metadata.get('id') username = metadata.get('username').get('_content') url = f'https://www.flickr.com/photos/{path_alias}' obj = { 'nsid': nsid, 'username': username, 'url': url, 'path_alias': path_alias, 'filename': f'{path_alias}.json' } elif opt_query == 'nsid_profile': obj = metadata.get('profile') items.append(obj) # conver to DataFrame df = pd.DataFrame.from_dict(items) df.to_csv(opt_fp_out, index=False) log.info(f'Wrote {len(df)} to {opt_fp_out}')