diff options
Diffstat (limited to 'megapixels/commands/search/flickr.py')
| -rw-r--r-- | megapixels/commands/search/flickr.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/megapixels/commands/search/flickr.py b/megapixels/commands/search/flickr.py new file mode 100644 index 00000000..ef3515bf --- /dev/null +++ b/megapixels/commands/search/flickr.py @@ -0,0 +1,71 @@ +""" +# Examples: +-q https://farm2.staticflickr.com/1252/1366994858_d4a2b377cc_o.jpg +-q 48876008@N05 +-q 1366994858 +""" + +import click + +# datasets +dataset_keys = ['pipa', 'megaface', 'helen', 'ibm_dif', 'adience', 'who_goes_there', 'vgg_face'] + +@click.command() +@click.option('-q', '--query', 'opt_query', type=str, required=True, + help='Photo URL, photo id, or NSID') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output file CSV') +@click.pass_context +def cli(ctx, opt_query, opt_fp_out): + """Locate image by Flickr identifier""" + + import sys + from os.path import join + from glob import glob + from pathlib import Path + import time + + import pandas as pd + from tqdm import tqdm + + from app.utils.logger_utils import Logger + + log = Logger.getLogger() + log.debug(f'Search query: "{opt_query}"') + + fp_dataset_base = '/data_store/datasets/people/' + + matches = [] + + if '@' in opt_query: + # process NSID format + qk = 'nsid' + elif 'staticflickr.com' in opt_query: + # process URL to photo id + opt_query = Path(opt_query).name.split('_')[0] + qk = 'photo_id' + else: + # process as photo id + qk = 'photo_id' + + log.debug(f'Searching Flickr data using "{qk}"') + + for dk in dataset_keys: + # read dataset metadata + fp_filepaths = join(fp_dataset_base, dk, f'metadata/{dk}_filepaths.csv') + log.debug(f'loading: {fp_filepaths}') + df = pd.read_csv(fp_filepaths, dtype={'photo_id': str}) + photo_ids = list(df[qk]) + + # search for match + if opt_query in photo_ids: + log.info(f'Found "{qk} = {opt_query}" id in "{dk}"') + df_match = df[df[qk] == opt_query] + records = df_match.to_dict('records') + for record in records: + record['dataset'] = dk + matches.append(record) + + # Write file + log.debug(f'Found {len(matches)} matches') + pd.DataFrame.from_dict(matches).to_csv(opt_fp_out, index=False)
\ No newline at end of file |
