summaryrefslogtreecommitdiff
path: root/megapixels/commands/msc/embassy_flickr_api_data_to_csv.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-05-29 15:24:30 +0200
committeradamhrv <adam@ahprojects.com>2019-05-29 15:24:30 +0200
commit5b916111ee1a012650a586ec07bc9150d66020bc (patch)
tree128092857e6a9b6d67877e55e05da4f99ea2f5eb /megapixels/commands/msc/embassy_flickr_api_data_to_csv.py
parentf5141a7b48ee569089b07428bc75cb84a55c4834 (diff)
add MSC nbs and cli cmds
Diffstat (limited to 'megapixels/commands/msc/embassy_flickr_api_data_to_csv.py')
-rw-r--r--megapixels/commands/msc/embassy_flickr_api_data_to_csv.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/megapixels/commands/msc/embassy_flickr_api_data_to_csv.py b/megapixels/commands/msc/embassy_flickr_api_data_to_csv.py
new file mode 100644
index 00000000..1a0b6a91
--- /dev/null
+++ b/megapixels/commands/msc/embassy_flickr_api_data_to_csv.py
@@ -0,0 +1,120 @@
+"""
+Converts directory of JSON API output files to CSV format
+"""
+
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+data_types = ['nsid_url', 'nsid_profile']
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output file')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+ help='Slice list of files')
+@click.option('-q', '--query', 'opt_query', required=True, type=click.Choice(data_types),
+ help='Flickr API data type')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_force, opt_query):
+ """Fetches Flickr API for user info. Saves to JSON"""
+
+ from tqdm import tqdm
+ from glob import glob
+ import json
+
+
+ # -------------------------------------------------
+ # process
+ if Path(opt_fp_out).is_file() and not opt_force:
+ log.error('File exists. Use "--force" to overwrite it')
+ return
+
+ fp_files = glob(join(opt_fp_in, '*.json'))
+ fp_files = [f for f in fp_files if 'error' not in f]
+ if opt_slice:
+ fp_files = fp_files[opt_slice[0]:opt_slice[1]]
+
+ log.debug(f'Found {len(fp_files)} files')
+ items = []
+ """
+ {
+ "stat": "ok",
+ "user": {
+ "id": "95216244@N04",
+ "username": {
+ "_content": "AfghanistanWien"
+ }
+ }
+ }
+ """
+ """
+ {
+ "profile": {
+ "city": "Oslo",
+ "country": "Norway",
+ "facebook": "",
+ "first_name": "US Embassy",
+ "hometown": "Oslo",
+ "id": "133886098@N05",
+ "instagram": "",
+ "join_date": "1436521589",
+ "last_name": "Oslo",
+ "nsid": "133886098@N05",
+ "occupation": "",
+ "pinterest": "",
+ "profile_description": "This is the official Flickr profile of the U.S. Embassy in Oslo, Norway. Contact us: osloirc@state.gov.",
+ "showcase_set": "72157677372281094",
+ "showcase_set_title": "Profile Showcase",
+ "tumblr": "",
+ "twitter": "",
+ "website": "http://norway.usembassy.gov/index.html"
+ },
+ "stat": "ok"
+ }
+ """
+ # Convert to |nsid|username|
+ for fp_file in tqdm(fp_files):
+ metadata = file_utils.load_json(fp_file)
+
+ if opt_query == 'nsid_url':
+ path_alias = Path(fp_file).stem
+ metadata = metadata.get('user')
+ nsid = metadata.get('id')
+ username = metadata.get('username').get('_content')
+ url = f'https://www.flickr.com/photos/{path_alias}'
+ obj = {
+ 'nsid': nsid,
+ 'username': username,
+ 'url': url,
+ 'path_alias': path_alias,
+ 'filename': f'{path_alias}.json'
+ }
+ elif opt_query == 'nsid_profile':
+ obj = metadata.get('profile')
+
+ items.append(obj)
+
+
+ # conver to DataFrame
+ df = pd.DataFrame.from_dict(items)
+ df.to_csv(opt_fp_out, index=False)
+ log.info(f'Wrote {len(df)} to {opt_fp_out}') \ No newline at end of file