import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg s3_dirs = {'media': cfg.S3_MEDIA_URL, 'metadata': cfg.S3_METADATA_URL} @click.command() @click.option('--data_store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.SSD), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--dataset', 'opt_dataset', type=cfg.DatasetVar, required=True, show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True, help='S3 location') @click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False) @click.pass_context def cli(ctx, opt_data_store, opt_dataset, opt_type, opt_dryrun): """Syncs files with S3/spaces server""" from os.path import join from pathlib import Path from tqdm import tqdm import pandas as pd import subprocess from app.utils import logger_utils, file_utils from app.models.data_store import DataStore # ------------------------------------------------- # init here log = logger_utils.Logger.getLogger() # set data_store data_store = DataStore(opt_data_store, opt_dataset) dataset_name = opt_dataset.name.lower() if opt_type == 'media': dir_src = join(data_store.uuid_dir(), '') dir_dst = join(s3_dirs[opt_type], dataset_name, '') elif opt_type == 'metadata': dir_src = join(data_store.metadata_dir(), '') dir_dst = join(s3_dirs[opt_type], dataset_name, '') cmd = ['s3cmd', 'sync', dir_src, dir_dst, '-P', '--follow-symlinks'] log.info(' '.join(cmd)) if not opt_dryrun: subprocess.call(cmd) ''' upload: '/data_store_ssd/datasets/people/vgg_face2/media/uuid/00418e0e-48e9-44f9-b6a0-b2ffd773802e.jpg' -> 's3://megapixels/v1/media/vgg_face2/00418e0e-48e9-44f9-b6a0-b2ffd773802e.jpg' [3202 of 3187313] [2953 of 3187313] '''