1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger
log = Logger.getLogger()
@click.command()
@click.option('-i', '--input', 'opt_fp_in', default=None,
help='Override enum input filename CSV')
@click.option('-o', '--output', 'opt_fp_out', default=None,
help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
type=cfg.DataStoreVar,
default=click_utils.get_default(types.DataStore.NAS),
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
type=cfg.DatasetVar,
required=True,
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('-f', '--force', 'opt_force', is_flag=True,
help='Force overwrite file')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset, opt_force):
"""Appends UUID to records CSV"""
from glob import glob
from os.path import join
from pathlib import Path
import base64
import uuid
from tqdm import tqdm
import pandas as pd
from app.models import DataStore
# set data_store
data_store = DataStore(opt_data_store, opt_dataset)
# get filepath out
fp_out = data_store.metadata(types.Metadata.UUID) if opt_fp_out is None else opt_fp_out
# exit if exists
if not opt_force and Path(fp_out).exists():
log.error('File exists. Use "-f / --force" to overwite')
return
# load sha256 records
fp_in = data_store.metadata(types.Metadata.SHA256) if opt_fp_in is None else opt_fp_in
log.info(f'Loading: {fp_in}')
df_records = pd.read_csv(fp_in).set_index('index')
df_uuids = df_records.copy()
df_uuids['uuid'] = [uuid.uuid4()] * len(df_uuids)
for df_record in tqdm(df_records.itertuples(), total=len(df_uuids)):
image_index = df_record.Index
df_uuids.at[image_index, 'uuid'] = uuid.uuid4()
df_uuids = df_uuids.drop(['sha256', 'identity_index'], axis=1)
df_uuids.to_csv(fp_out)
|