summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/s3_sync.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-17 01:37:31 +0100
committeradamhrv <adam@ahprojects.com>2018-12-17 01:37:31 +0100
commit88ec48e1c4d93ba9cd3aa186c068ef2aa4c27c56 (patch)
tree506075c0c8f0d4bbf15e97c6db50b6e055c5bd4e /megapixels/commands/datasets/s3_sync.py
parent23e9fef5dce8b0b15dd94713816b9d7d45f12356 (diff)
fixing dataset procesosrs
Diffstat (limited to 'megapixels/commands/datasets/s3_sync.py')
-rw-r--r--megapixels/commands/datasets/s3_sync.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/s3_sync.py b/megapixels/commands/datasets/s3_sync.py
new file mode 100644
index 00000000..3098d9be
--- /dev/null
+++ b/megapixels/commands/datasets/s3_sync.py
@@ -0,0 +1,57 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+s3_dirs = {'media': cfg.S3_MEDIA_URL, 'metadata': cfg.S3_METADATA_URL}
+
+@click.command()
+@click.option('--data_store', 'opt_data_store',
+ type=cfg.DataStoreVar,
+ default=click_utils.get_default(types.DataStore.SSD),
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('--dataset', 'opt_dataset',
+ type=cfg.DatasetVar,
+ required=True,
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
+ help='S3 location')
+@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
+@click.pass_context
+def cli(ctx, opt_data_store, opt_dataset, opt_type, opt_dryrun):
+ """Syncs files with S3/spaces server"""
+
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+ import subprocess
+
+ from app.utils import logger_utils, file_utils
+ from app.models.data_store import DataStore
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ # set data_store
+ data_store = DataStore(opt_data_store, opt_dataset)
+ dataset_name = opt_dataset.name.lower()
+ if opt_type == 'media':
+ dir_src = join(data_store.uuid_dir(), '')
+ dir_dst = join(s3_dirs[opt_type], dataset_name, '')
+ elif opt_type == 'metadata':
+ dir_src = join(data_store.metadata_dir(), '')
+ dir_dst = join(s3_dirs[opt_type], dataset_name, '')
+
+ cmd = ['s3cmd', 'sync', dir_src, dir_dst, '-P', '--follow-symlinks']
+ log.info(' '.join(cmd))
+ if not opt_dryrun:
+ subprocess.call(cmd)
+
+ \ No newline at end of file