summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/whogoesthere.py
blob: 6cf9f0097f1966426121583c0ab961e25c4dc596 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Unpack data for:

Z. Bessinger, C. Stauffer, and N. Jacobs, “Who Goes There? Approaches to 
Mapping Facial Appearance Diversity,” in Proceedings of the 24th SIGSPATIAL 
International Conference on Advances in Geographic Information Systems, 2016.
"""

import click

from app.utils.logger_utils import Logger

log = Logger.getLogger()

keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', 
  'content_length', 'country_code', 'date_taken', 'date_uploaded', 
  'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', 
  'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', 
  'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', 
  'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', 
  'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']

keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', 
  'content_length', 'country_code', 'date_taken', 'date_uploaded', 
  'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', 
  'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', 
  'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', 
  'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', 
  'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']

@click.command()
@click.pass_context
@click.option('-i', '--input', 'opt_fp_in', required=True)
@click.option('-o', '--output', 'opt_fp_out', required=True)
@click.option('--value', 'opt_value', required=True, type=click.Choice(keys_all))
def cli(ctx, opt_fp_in, opt_fp_out, opt_value):
  """Convert WhoGoesThere HDF5"""
  
  import sys
  from glob import glob
  from os.path import join
  from pathlib import Path
  import time

  import pandas as pd
  import h5py
  from scipy import misc
  from io import BytesIO
  from base64 import b64decode
  from tqdm import tqdm
  
  log = Logger.getLogger()
  log.info('Uncompress HDF5')

  key_vals = []
  
  with h5py.File(opt_fp_in, 'r') as fp:
    num_items = len(fp['face'])
    log.info(f'items: {num_items:,}')

    for idx in tqdm(range(0, min(99999999,num_items))):
      # face_str = fp['face'][0]
      # face_im = misc.imread(BytesIO(b64decode(face_str)))
      # print(fo['face_landmarks_f/x'][0])
      # age = fp['age'][idx].decode()
      key_val = fp[opt_value][idx].decode()
      key_vals.append(key_val)

  key_vals = set(key_vals)
  with open(opt_fp_out, 'w') as fp:
    for key_val in key_vals:
      fp.write(f'{key_val}\n')