1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
s3_dirs = {'media': cfg.S3_MEDIA_URL, 'metadata': cfg.S3_METADATA_URL}
@click.command()
@click.option('--data_store', 'opt_data_store',
type=cfg.DataStoreVar,
default=click_utils.get_default(types.DataStore.SSD),
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
type=cfg.DatasetVar,
required=True,
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
help='S3 location')
@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
@click.pass_context
def cli(ctx, opt_data_store, opt_dataset, opt_type, opt_dryrun):
"""Syncs files with S3/spaces server"""
from os.path import join
from pathlib import Path
from tqdm import tqdm
import pandas as pd
import subprocess
from app.utils import logger_utils, file_utils
from app.models.data_store import DataStore
# -------------------------------------------------
# init here
log = logger_utils.Logger.getLogger()
# set data_store
data_store = DataStore(opt_data_store, opt_dataset)
dataset_name = opt_dataset.name.lower()
if opt_type == 'media':
dir_src = join(data_store.uuid_dir(), '')
dir_dst = join(s3_dirs[opt_type], dataset_name, '')
elif opt_type == 'metadata':
dir_src = join(data_store.metadata_dir(), '')
dir_dst = join(s3_dirs[opt_type], dataset_name, '')
cmd = ['s3cmd', 'sync', dir_src, dir_dst, '-P', '--follow-symlinks']
log.info(' '.join(cmd))
if not opt_dryrun:
subprocess.call(cmd)
'''
upload: '/data_store_ssd/datasets/people/vgg_face2/media/uuid/00418e0e-48e9-44f9-b6a0-b2ffd773802e.jpg' -> 's3://megapixels/v1/media/vgg_face2/00418e0e-48e9-44f9-b6a0-b2ffd773802e.jpg' [3202 of 3187313]
[2953 of 3187313]
'''
|