summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/symlink_uuid.py
blob: 7c5faa9525fda314acc6f1461d150111c072cd3b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg

@click.command()
@click.option('-i', '--input', 'opt_fp_in', default=None,
  help='Override enum input filename CSV')
@click.option('-o', '--output', 'opt_fp_out', default=None,
  help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
  type=cfg.DataStoreVar,
  default=click_utils.get_default(types.DataStore.SSD),
  show_default=True,
  help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
  type=cfg.DatasetVar,
  required=True,
  show_default=True,
  help=click_utils.show_help(types.Dataset))
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset):
  """Symlinks images to new directory for S3"""
  
  import sys
  import os
  from os.path import join
  from pathlib import Path
  
  from tqdm import tqdm
  import pandas as pd

  from app.utils import logger_utils, file_utils
  from app.models.data_store import DataStore
  
  # -------------------------------------------------
  # init here

  log = logger_utils.Logger.getLogger()

  # set data_store
  data_store = DataStore(opt_data_store, opt_dataset)
  fp_records = data_store.metadata(types.Metadata.FILE_RECORD)
  df_records = pd.read_csv(fp_records).set_index('index')
  nrows = len(df_records)

  dir_out = data_store.uuid_dir() if opt_fp_out is None else opt_fp_out
  file_utils.mkdirs(dir_out)

  for ds_record in tqdm(df_records.itertuples(), total=nrows):
    # make image path
    fp_src = data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext)
    fp_dst = data_store.face_uuid(ds_record.uuid, ds_record.ext)
    Path(fp_dst).symlink_to(Path(fp_src))

  log.info('symlinked {:,} files'.format(nrows))