summaryrefslogtreecommitdiff
path: root/megapixels/commands/search/flickr.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/search/flickr.py')
-rw-r--r--megapixels/commands/search/flickr.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/megapixels/commands/search/flickr.py b/megapixels/commands/search/flickr.py
new file mode 100644
index 00000000..ef3515bf
--- /dev/null
+++ b/megapixels/commands/search/flickr.py
@@ -0,0 +1,71 @@
+"""
+# Examples:
+-q https://farm2.staticflickr.com/1252/1366994858_d4a2b377cc_o.jpg
+-q 48876008@N05
+-q 1366994858
+"""
+
+import click
+
+# datasets
+dataset_keys = ['pipa', 'megaface', 'helen', 'ibm_dif', 'adience', 'who_goes_there', 'vgg_face']
+
+@click.command()
+@click.option('-q', '--query', 'opt_query', type=str, required=True,
+ help='Photo URL, photo id, or NSID')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output file CSV')
+@click.pass_context
+def cli(ctx, opt_query, opt_fp_out):
+ """Locate image by Flickr identifier"""
+
+ import sys
+ from os.path import join
+ from glob import glob
+ from pathlib import Path
+ import time
+
+ import pandas as pd
+ from tqdm import tqdm
+
+ from app.utils.logger_utils import Logger
+
+ log = Logger.getLogger()
+ log.debug(f'Search query: "{opt_query}"')
+
+ fp_dataset_base = '/data_store/datasets/people/'
+
+ matches = []
+
+ if '@' in opt_query:
+ # process NSID format
+ qk = 'nsid'
+ elif 'staticflickr.com' in opt_query:
+ # process URL to photo id
+ opt_query = Path(opt_query).name.split('_')[0]
+ qk = 'photo_id'
+ else:
+ # process as photo id
+ qk = 'photo_id'
+
+ log.debug(f'Searching Flickr data using "{qk}"')
+
+ for dk in dataset_keys:
+ # read dataset metadata
+ fp_filepaths = join(fp_dataset_base, dk, f'metadata/{dk}_filepaths.csv')
+ log.debug(f'loading: {fp_filepaths}')
+ df = pd.read_csv(fp_filepaths, dtype={'photo_id': str})
+ photo_ids = list(df[qk])
+
+ # search for match
+ if opt_query in photo_ids:
+ log.info(f'Found "{qk} = {opt_query}" id in "{dk}"')
+ df_match = df[df[qk] == opt_query]
+ records = df_match.to_dict('records')
+ for record in records:
+ record['dataset'] = dk
+ matches.append(record)
+
+ # Write file
+ log.debug(f'Found {len(matches)} matches')
+ pd.DataFrame.from_dict(matches).to_csv(opt_fp_out, index=False) \ No newline at end of file