1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
"""
Unpack data for:
Z. Bessinger, C. Stauffer, and N. Jacobs, “Who Goes There? Approaches to
Mapping Facial Appearance Diversity,” in Proceedings of the 24th SIGSPATIAL
International Conference on Advances in Geographic Information Systems, 2016.
"""
import click
from app.utils.logger_utils import Logger
log = Logger.getLogger()
keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city',
'content_length', 'country_code', 'date_taken', 'date_uploaded',
'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f',
'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original',
'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original',
'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude',
'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']
keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city',
'content_length', 'country_code', 'date_taken', 'date_uploaded',
'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f',
'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original',
'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original',
'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude',
'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']
@click.command()
@click.pass_context
@click.option('-i', '--input', 'opt_fp_in', required=True)
@click.option('-o', '--output', 'opt_fp_out', required=True)
@click.option('--value', 'opt_value', required=True, type=click.Choice(keys_all))
def cli(ctx, opt_fp_in, opt_fp_out, opt_value):
"""Convert WhoGoesThere HDF5"""
import sys
from glob import glob
from os.path import join
from pathlib import Path
import time
import pandas as pd
import h5py
from scipy import misc
from io import BytesIO
from base64 import b64decode
from tqdm import tqdm
log = Logger.getLogger()
log.info('Uncompress HDF5')
key_vals = []
with h5py.File(opt_fp_in, 'r') as fp:
num_items = len(fp['face'])
log.info(f'items: {num_items:,}')
for idx in tqdm(range(0, min(99999999,num_items))):
# face_str = fp['face'][0]
# face_im = misc.imread(BytesIO(b64decode(face_str)))
# print(fo['face_landmarks_f/x'][0])
# age = fp['age'][idx].decode()
key_val = fp[opt_value][idx].decode()
key_vals.append(key_val)
key_vals = set(key_vals)
with open(opt_fp_out, 'w') as fp:
for key_val in key_vals:
fp.write(f'{key_val}\n')
|