import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg @click.command() @click.option('-i', '--input', 'opt_fp_in', default=None, help='Override enum input filename CSV') @click.option('-o', '--output', 'opt_fp_out', default=None, help='Override enum output filename CSV') @click.option('--data_store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.SSD), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--dataset', 'opt_dataset', type=cfg.DatasetVar, required=True, show_default=True, help=click_utils.show_help(types.Dataset)) @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset): """Symlinks images to new directory for S3""" import sys import os from os.path import join from pathlib import Path from tqdm import tqdm import pandas as pd from app.utils import logger_utils, file_utils from app.models.data_store import DataStore # ------------------------------------------------- # init here log = logger_utils.Logger.getLogger() # set data_store data_store = DataStore(opt_data_store, opt_dataset) fp_records = data_store.metadata(types.Metadata.FILE_RECORD) df_records = pd.read_csv(fp_records).set_index('index') nrows = len(df_records) dir_out = data_store.uuid_dir() if opt_fp_out is None else opt_fp_out file_utils.mkdirs(dir_out) for ds_record in tqdm(df_records.itertuples(), total=nrows): # make image path fp_src = data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) fp_dst = data_store.face_uuid(ds_record.uuid, ds_record.ext) Path(fp_dst).symlink_to(Path(fp_src)) log.info('symlinked {:,} files'.format(nrows))