summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/symlink.py
blob: 70ec6c46b0ca4f632746db84d05bae0c2582ddf7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Input records CSV')
@click.option('-m', '--media', 'opt_fp_media', required=True,
  help='Input media directory')
@click.option('-o', '--output', 'opt_fp_out', required=True,
  help='Output directory')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out):
  """Symlinks images to new directory for S3"""
  
  import sys
  import os
  from os.path import join
  from pathlib import Path
  
  from tqdm import tqdm
  import pandas as pd

  from app.utils import logger_utils, file_utils
  
  # -------------------------------------------------
  # init here

  log = logger_utils.Logger.getLogger()

  df_records = pd.read_csv(opt_fp_in)
  nrows = len(df_records)

  file_utils.mkdirs(opt_fp_out)

  for record_id, row in tqdm(df_records.iterrows(), total=nrows):
    # make image path
    df = df_records.iloc[record_id]
    fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])))
    fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext'])))
    fpp_dst.symlink_to(fpp_src)

  log.info('symlinked {:,} files'.format(nrows))