summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/symlink.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/symlink.py')
-rw-r--r--megapixels/commands/datasets/symlink.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/symlink.py b/megapixels/commands/datasets/symlink.py
new file mode 100644
index 00000000..70ec6c46
--- /dev/null
+++ b/megapixels/commands/datasets/symlink.py
@@ -0,0 +1,45 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input records CSV')
+@click.option('-m', '--media', 'opt_fp_media', required=True,
+ help='Input media directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out):
+ """Symlinks images to new directory for S3"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ df_records = pd.read_csv(opt_fp_in)
+ nrows = len(df_records)
+
+ file_utils.mkdirs(opt_fp_out)
+
+ for record_id, row in tqdm(df_records.iterrows(), total=nrows):
+ # make image path
+ df = df_records.iloc[record_id]
+ fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])))
+ fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext'])))
+ fpp_dst.symlink_to(fpp_src)
+
+ log.info('symlinked {:,} files'.format(nrows)) \ No newline at end of file