diff options
Diffstat (limited to 'megapixels/commands/datasets/whogoesthere.py')
| -rw-r--r-- | megapixels/commands/datasets/whogoesthere.py | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/whogoesthere.py b/megapixels/commands/datasets/whogoesthere.py new file mode 100644 index 00000000..6cf9f009 --- /dev/null +++ b/megapixels/commands/datasets/whogoesthere.py @@ -0,0 +1,72 @@ +""" +Unpack data for: + +Z. Bessinger, C. Stauffer, and N. Jacobs, “Who Goes There? Approaches to +Mapping Facial Appearance Diversity,” in Proceedings of the 24th SIGSPATIAL +International Conference on Advances in Geographic Information Systems, 2016. +""" + +import click + +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', + 'content_length', 'country_code', 'date_taken', 'date_uploaded', + 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', + 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', + 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', + 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', + 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags'] + +keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', + 'content_length', 'country_code', 'date_taken', 'date_uploaded', + 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', + 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', + 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', + 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', + 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags'] + +@click.command() +@click.pass_context +@click.option('-i', '--input', 'opt_fp_in', required=True) +@click.option('-o', '--output', 'opt_fp_out', required=True) +@click.option('--value', 'opt_value', required=True, type=click.Choice(keys_all)) +def cli(ctx, opt_fp_in, opt_fp_out, opt_value): + """Convert WhoGoesThere HDF5""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + + import pandas as pd + import h5py + from scipy import misc + from io import BytesIO + from base64 import b64decode + from tqdm import tqdm + + log = Logger.getLogger() + log.info('Uncompress HDF5') + + key_vals = [] + + with h5py.File(opt_fp_in, 'r') as fp: + num_items = len(fp['face']) + log.info(f'items: {num_items:,}') + + for idx in tqdm(range(0, min(99999999,num_items))): + # face_str = fp['face'][0] + # face_im = misc.imread(BytesIO(b64decode(face_str))) + # print(fo['face_landmarks_f/x'][0]) + # age = fp['age'][idx].decode() + key_val = fp[opt_value][idx].decode() + key_vals.append(key_val) + + key_vals = set(key_vals) + with open(opt_fp_out, 'w') as fp: + for key_val in key_vals: + fp.write(f'{key_val}\n')
\ No newline at end of file |
