1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
@click.command()
@click.option('-i', '--input', 'opt_fp_in', default=None,
help='Override enum input filename CSV')
@click.option('-o', '--output', 'opt_fp_out', default=None,
help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
type=cfg.DataStoreVar,
default=click_utils.get_default(types.DataStore.SSD),
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
type=cfg.DatasetVar,
required=True,
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset):
"""Symlinks images to new directory for S3"""
import sys
import os
from os.path import join
from pathlib import Path
from tqdm import tqdm
import pandas as pd
from app.utils import logger_utils, file_utils
from app.models.data_store import DataStore
# -------------------------------------------------
# init here
log = logger_utils.Logger.getLogger()
# set data_store
data_store = DataStore(opt_data_store, opt_dataset)
fp_records = data_store.metadata(types.Metadata.FILE_RECORD)
df_records = pd.read_csv(fp_records).set_index('index')
nrows = len(df_records)
dir_out = data_store.uuid_dir() if opt_fp_out is None else opt_fp_out
file_utils.mkdirs(dir_out)
for ds_record in tqdm(df_records.itertuples(), total=nrows):
# make image path
fp_src = data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext)
fp_dst = data_store.face_uuid(ds_record.uuid, ds_record.ext)
Path(fp_dst).symlink_to(Path(fp_src))
log.info('symlinked {:,} files'.format(nrows))
|