summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/s3.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-12-13 18:13:55 +0100
committerJules Laplace <julescarbon@gmail.com>2018-12-13 18:13:55 +0100
commit3ab28a3ff3d0e1b71f123e38ce3d0df42caddc7c (patch)
tree5ec327f62304c93583d982d20e2e92e7d0fa6f57 /megapixels/commands/datasets/s3.py
parentd1da6ed6b0a6911c3b24e012ea051c9253ce8479 (diff)
parentbd51b3cdf474c93b1d7c667d9e5a33159c97640a (diff)
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/commands/datasets/s3.py')
-rw-r--r--megapixels/commands/datasets/s3.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/s3.py b/megapixels/commands/datasets/s3.py
new file mode 100644
index 00000000..7769896b
--- /dev/null
+++ b/megapixels/commands/datasets/s3.py
@@ -0,0 +1,47 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+s3_dirs = {'media': cfg.S3_MEDIA_ROOT, 'metadata': cfg.S3_METADATA_ROOT}
+
+@click.command()
+@click.option('-i', '--input', 'opt_fps_in', required=True, multiple=True,
+ help='Input directory')
+@click.option('--name', 'opt_dataset_name', required=True,
+ help='Dataset key (eg "lfw"')
+@click.option('-a', '--action', 'opt_action', type=click.Choice(['sync', 'put']), default='sync',
+ help='S3 action')
+@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
+ help='S3 location')
+@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
+@click.pass_context
+def cli(ctx, opt_fps_in, opt_dataset_name, opt_action, opt_type, opt_dryrun):
+ """Syncs files with S3/spaces server"""
+
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+ import subprocess
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+ for opt_fp_in in opt_fps_in:
+ dir_dst = join(s3_dirs[opt_type], opt_dataset_name, '')
+ if Path(opt_fp_in).is_dir():
+ fp_src = join(opt_fp_in, '') # add trailing slashes
+ else:
+ fp_src = join(opt_fp_in)
+ cmd = ['s3cmd', opt_action, fp_src, dir_dst, '-P', '--follow-symlinks']
+ log.info(' '.join(cmd))
+ if not opt_dryrun:
+ subprocess.call(cmd)
+
+ \ No newline at end of file