""" # Examples: -q https://farm2.staticflickr.com/1252/1366994858_d4a2b377cc_o.jpg -q 48876008@N05 -q 1366994858 """ import click # datasets dataset_keys = ['pipa', 'megaface', 'helen', 'ibm_dif', 'adience', 'who_goes_there', 'vgg_face'] @click.command() @click.option('-q', '--query', 'opt_query', type=str, required=True, help='Photo URL, photo id, or NSID') @click.option('-o', '--output', 'opt_fp_out', required=True, help='Output file CSV') @click.pass_context def cli(ctx, opt_query, opt_fp_out): """Locate image by Flickr identifier""" import sys from os.path import join from glob import glob from pathlib import Path import time import pandas as pd from tqdm import tqdm from app.utils.logger_utils import Logger log = Logger.getLogger() log.debug(f'Search query: "{opt_query}"') fp_dataset_base = '/data_store/datasets/people/' matches = [] if '@' in opt_query: # process NSID format qk = 'nsid' elif 'staticflickr.com' in opt_query: # process URL to photo id opt_query = Path(opt_query).name.split('_')[0] qk = 'photo_id' else: # process as photo id qk = 'photo_id' log.debug(f'Searching Flickr data using "{qk}"') for dk in dataset_keys: # read dataset metadata fp_filepaths = join(fp_dataset_base, dk, f'metadata/{dk}_filepaths.csv') log.debug(f'loading: {fp_filepaths}') df = pd.read_csv(fp_filepaths, dtype={'photo_id': str}) photo_ids = list(df[qk]) # search for match if opt_query in photo_ids: log.info(f'Found "{qk} = {opt_query}" id in "{dk}"') df_match = df[df[qk] == opt_query] records = df_match.to_dict('records') for record in records: record['dataset'] = dk matches.append(record) # Write file log.debug(f'Found {len(matches)} matches') pd.DataFrame.from_dict(matches).to_csv(opt_fp_out, index=False)