summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/whogoesthere.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/whogoesthere.py')
-rw-r--r--megapixels/commands/datasets/whogoesthere.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/whogoesthere.py b/megapixels/commands/datasets/whogoesthere.py
new file mode 100644
index 00000000..6cf9f009
--- /dev/null
+++ b/megapixels/commands/datasets/whogoesthere.py
@@ -0,0 +1,72 @@
+"""
+Unpack data for:
+
+Z. Bessinger, C. Stauffer, and N. Jacobs, “Who Goes There? Approaches to
+Mapping Facial Appearance Diversity,” in Proceedings of the 24th SIGSPATIAL
+International Conference on Advances in Geographic Information Systems, 2016.
+"""
+
+import click
+
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city',
+ 'content_length', 'country_code', 'date_taken', 'date_uploaded',
+ 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f',
+ 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original',
+ 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original',
+ 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude',
+ 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']
+
+keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city',
+ 'content_length', 'country_code', 'date_taken', 'date_uploaded',
+ 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f',
+ 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original',
+ 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original',
+ 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude',
+ 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']
+
+@click.command()
+@click.pass_context
+@click.option('-i', '--input', 'opt_fp_in', required=True)
+@click.option('-o', '--output', 'opt_fp_out', required=True)
+@click.option('--value', 'opt_value', required=True, type=click.Choice(keys_all))
+def cli(ctx, opt_fp_in, opt_fp_out, opt_value):
+ """Convert WhoGoesThere HDF5"""
+
+ import sys
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import time
+
+ import pandas as pd
+ import h5py
+ from scipy import misc
+ from io import BytesIO
+ from base64 import b64decode
+ from tqdm import tqdm
+
+ log = Logger.getLogger()
+ log.info('Uncompress HDF5')
+
+ key_vals = []
+
+ with h5py.File(opt_fp_in, 'r') as fp:
+ num_items = len(fp['face'])
+ log.info(f'items: {num_items:,}')
+
+ for idx in tqdm(range(0, min(99999999,num_items))):
+ # face_str = fp['face'][0]
+ # face_im = misc.imread(BytesIO(b64decode(face_str)))
+ # print(fo['face_landmarks_f/x'][0])
+ # age = fp['age'][idx].decode()
+ key_val = fp[opt_value][idx].decode()
+ key_vals.append(key_val)
+
+ key_vals = set(key_vals)
+ with open(opt_fp_out, 'w') as fp:
+ for key_val in key_vals:
+ fp.write(f'{key_val}\n') \ No newline at end of file