summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/s3.py
blob: 7769896bae154be3290f602d0d27742e1b39cc58 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg

s3_dirs = {'media': cfg.S3_MEDIA_ROOT, 'metadata': cfg.S3_METADATA_ROOT}

@click.command()
@click.option('-i', '--input', 'opt_fps_in', required=True, multiple=True,
  help='Input directory')
@click.option('--name', 'opt_dataset_name', required=True,
  help='Dataset key (eg "lfw"')
@click.option('-a', '--action', 'opt_action', type=click.Choice(['sync', 'put']), default='sync',
  help='S3 action')
@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
  help='S3 location')
@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
@click.pass_context
def cli(ctx, opt_fps_in, opt_dataset_name, opt_action, opt_type, opt_dryrun):
  """Syncs files with S3/spaces server"""
  
  from os.path import join
  from pathlib import Path
  
  from tqdm import tqdm
  import pandas as pd
  import subprocess

  from app.utils import logger_utils, file_utils
  
  # -------------------------------------------------
  # init here

  log = logger_utils.Logger.getLogger()
  for opt_fp_in in opt_fps_in:
    dir_dst = join(s3_dirs[opt_type], opt_dataset_name, '')
    if Path(opt_fp_in).is_dir():
      fp_src = join(opt_fp_in, '')  # add trailing slashes
    else:
      fp_src = join(opt_fp_in) 
    cmd = ['s3cmd', opt_action, fp_src, dir_dst, '-P', '--follow-symlinks']
    log.info(' '.join(cmd))
    if not opt_dryrun:
      subprocess.call(cmd)