diff options
Diffstat (limited to 'megapixels/commands/datasets/symlink.py')
| -rw-r--r-- | megapixels/commands/datasets/symlink.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/symlink.py b/megapixels/commands/datasets/symlink.py new file mode 100644 index 00000000..70ec6c46 --- /dev/null +++ b/megapixels/commands/datasets/symlink.py @@ -0,0 +1,45 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input records CSV') +@click.option('-m', '--media', 'opt_fp_media', required=True, + help='Input media directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output directory') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out): + """Symlinks images to new directory for S3""" + + import sys + import os + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import pandas as pd + + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + df_records = pd.read_csv(opt_fp_in) + nrows = len(df_records) + + file_utils.mkdirs(opt_fp_out) + + for record_id, row in tqdm(df_records.iterrows(), total=nrows): + # make image path + df = df_records.iloc[record_id] + fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext']))) + fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext']))) + fpp_dst.symlink_to(fpp_src) + + log.info('symlinked {:,} files'.format(nrows))
\ No newline at end of file |
