summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/gen_uuid.py
blob: 612c43ee5a8e61f8eb9881dbc9d7ecee5bf0983e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger

log = Logger.getLogger()

@click.command()
@click.option('-i', '--input', 'opt_fp_in', default=None,
  help='Override enum input filename CSV')
@click.option('-o', '--output', 'opt_fp_out', default=None,
  help='Override enum output filename CSV')
@click.option('--data_store', 'opt_data_store',
  type=cfg.DataStoreVar,
  default=click_utils.get_default(types.DataStore.NAS),
  show_default=True,
  help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
  type=cfg.DatasetVar,
  required=True,
  show_default=True,
  help=click_utils.show_help(types.Dataset))
@click.option('-f', '--force', 'opt_force', is_flag=True,
  help='Force overwrite file')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset, opt_force):
  """Appends UUID to records CSV"""
  
  from glob import glob
  from os.path import join
  from pathlib import Path
  import base64
  import uuid

  from tqdm import tqdm
  import pandas as pd

  from app.models import DataStore
  

  # set data_store
  data_store = DataStore(opt_data_store, opt_dataset)
  # get filepath out
  fp_out = data_store.metadata(types.Metadata.UUID) if opt_fp_out is None else opt_fp_out
  # exit if exists
  if not opt_force and Path(fp_out).exists():
    log.error('File exists. Use "-f / --force" to overwite')
    return

  # load sha256 records
  fp_in = data_store.metadata(types.Metadata.SHA256) if opt_fp_in is None else opt_fp_in
  log.info(f'Loading: {fp_in}')
  df_records = pd.read_csv(fp_in).set_index('index')
  
  df_uuids = df_records.copy()
  df_uuids['uuid'] = [uuid.uuid4()] * len(df_uuids)

  for df_record in tqdm(df_records.itertuples(), total=len(df_uuids)):
    image_index = df_record.Index
    df_uuids.at[image_index, 'uuid'] = uuid.uuid4()

  df_uuids = df_uuids.drop(['sha256', 'identity_index'], axis=1)
  df_uuids.to_csv(fp_out)