1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
"""
# Examples:
-q https://farm2.staticflickr.com/1252/1366994858_d4a2b377cc_o.jpg
-q 48876008@N05
-q 1366994858
"""
import click
# datasets
dataset_keys = ['pipa', 'megaface', 'helen', 'ibm_dif', 'adience', 'who_goes_there', 'vgg_face']
@click.command()
@click.option('-q', '--query', 'opt_query', type=str, required=True,
help='Photo URL, photo id, or NSID')
@click.option('-o', '--output', 'opt_fp_out', required=True,
help='Output file CSV')
@click.pass_context
def cli(ctx, opt_query, opt_fp_out):
"""Locate image by Flickr identifier"""
import sys
from os.path import join
from glob import glob
from pathlib import Path
import time
import pandas as pd
from tqdm import tqdm
from app.utils.logger_utils import Logger
log = Logger.getLogger()
log.debug(f'Search query: "{opt_query}"')
fp_dataset_base = '/data_store/datasets/people/'
matches = []
if '@' in opt_query:
# process NSID format
qk = 'nsid'
elif 'staticflickr.com' in opt_query:
# process URL to photo id
opt_query = Path(opt_query).name.split('_')[0]
qk = 'photo_id'
else:
# process as photo id
qk = 'photo_id'
log.debug(f'Searching Flickr data using "{qk}"')
for dk in dataset_keys:
# read dataset metadata
fp_filepaths = join(fp_dataset_base, dk, f'metadata/{dk}_filepaths.csv')
log.debug(f'loading: {fp_filepaths}')
df = pd.read_csv(fp_filepaths, dtype={'photo_id': str})
photo_ids = list(df[qk])
# search for match
if opt_query in photo_ids:
log.info(f'Found "{qk} = {opt_query}" id in "{dk}"')
df_match = df[df[qk] == opt_query]
records = df_match.to_dict('records')
for record in records:
record['dataset'] = dk
matches.append(record)
# Write file
log.debug(f'Found {len(matches)} matches')
pd.DataFrame.from_dict(matches).to_csv(opt_fp_out, index=False)
|