summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/symlink_uuid.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/symlink_uuid.py')
-rw-r--r--megapixels/commands/datasets/symlink_uuid.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/symlink_uuid.py b/megapixels/commands/datasets/symlink_uuid.py
new file mode 100644
index 00000000..7c5faa95
--- /dev/null
+++ b/megapixels/commands/datasets/symlink_uuid.py
@@ -0,0 +1,57 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', default=None,
+ help='Override enum input filename CSV')
+@click.option('-o', '--output', 'opt_fp_out', default=None,
+ help='Override enum output filename CSV')
+@click.option('--data_store', 'opt_data_store',
+ type=cfg.DataStoreVar,
+ default=click_utils.get_default(types.DataStore.SSD),
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('--dataset', 'opt_dataset',
+ type=cfg.DatasetVar,
+ required=True,
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset):
+ """Symlinks images to new directory for S3"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils
+ from app.models.data_store import DataStore
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ # set data_store
+ data_store = DataStore(opt_data_store, opt_dataset)
+ fp_records = data_store.metadata(types.Metadata.FILE_RECORD)
+ df_records = pd.read_csv(fp_records).set_index('index')
+ nrows = len(df_records)
+
+ dir_out = data_store.uuid_dir() if opt_fp_out is None else opt_fp_out
+ file_utils.mkdirs(dir_out)
+
+ for ds_record in tqdm(df_records.itertuples(), total=nrows):
+ # make image path
+ fp_src = data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext)
+ fp_dst = data_store.face_uuid(ds_record.uuid, ds_record.ext)
+ Path(fp_dst).symlink_to(Path(fp_src))
+
+ log.info('symlinked {:,} files'.format(nrows)) \ No newline at end of file