""" Unpack data for: Z. Bessinger, C. Stauffer, and N. Jacobs, “Who Goes There? Approaches to Mapping Facial Appearance Diversity,” in Proceedings of the 24th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2016. """ import click from app.utils.logger_utils import Logger log = Logger.getLogger() keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', 'content_length', 'country_code', 'date_taken', 'date_uploaded', 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags'] keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', 'content_length', 'country_code', 'date_taken', 'date_uploaded', 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags'] @click.command() @click.pass_context @click.option('-i', '--input', 'opt_fp_in', required=True) @click.option('-o', '--output', 'opt_fp_out', required=True) @click.option('--value', 'opt_value', required=True, type=click.Choice(keys_all)) def cli(ctx, opt_fp_in, opt_fp_out, opt_value): """Convert WhoGoesThere HDF5""" import sys from glob import glob from os.path import join from pathlib import Path import time import pandas as pd import h5py from scipy import misc from io import BytesIO from base64 import b64decode from tqdm import tqdm log = Logger.getLogger() log.info('Uncompress HDF5') key_vals = [] with h5py.File(opt_fp_in, 'r') as fp: num_items = len(fp['face']) log.info(f'items: {num_items:,}') for idx in tqdm(range(0, min(99999999,num_items))): # face_str = fp['face'][0] # face_im = misc.imread(BytesIO(b64decode(face_str))) # print(fo['face_landmarks_f/x'][0]) # age = fp['age'][idx].decode() key_val = fp[opt_value][idx].decode() key_vals.append(key_val) key_vals = set(key_vals) with open(opt_fp_out, 'w') as fp: for key_val in key_vals: fp.write(f'{key_val}\n')