summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/gen_uuid.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-16 19:37:58 +0100
committeradamhrv <adam@ahprojects.com>2018-12-16 19:37:58 +0100
commitc3839ea797401d740db64691c0b4922c935b131c (patch)
treeef64b6b441dd677a41f79a423af8b7a44e68b23f /megapixels/commands/datasets/gen_uuid.py
parent10f467b64e3be528ac246d5cf664d675aca3e7f3 (diff)
still sorting CSV vectors indexes
Diffstat (limited to 'megapixels/commands/datasets/gen_uuid.py')
-rw-r--r--megapixels/commands/datasets/gen_uuid.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/gen_uuid.py b/megapixels/commands/datasets/gen_uuid.py
new file mode 100644
index 00000000..612c43ee
--- /dev/null
+++ b/megapixels/commands/datasets/gen_uuid.py
@@ -0,0 +1,65 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', default=None,
+ help='Override enum input filename CSV')
+@click.option('-o', '--output', 'opt_fp_out', default=None,
+ help='Override enum output filename CSV')
+@click.option('--data_store', 'opt_data_store',
+ type=cfg.DataStoreVar,
+ default=click_utils.get_default(types.DataStore.NAS),
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('--dataset', 'opt_dataset',
+ type=cfg.DatasetVar,
+ required=True,
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset, opt_force):
+ """Appends UUID to records CSV"""
+
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import base64
+ import uuid
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.models import DataStore
+
+
+ # set data_store
+ data_store = DataStore(opt_data_store, opt_dataset)
+ # get filepath out
+ fp_out = data_store.metadata(types.Metadata.UUID) if opt_fp_out is None else opt_fp_out
+ # exit if exists
+ if not opt_force and Path(fp_out).exists():
+ log.error('File exists. Use "-f / --force" to overwite')
+ return
+
+ # load sha256 records
+ fp_in = data_store.metadata(types.Metadata.SHA256) if opt_fp_in is None else opt_fp_in
+ log.info(f'Loading: {fp_in}')
+ df_records = pd.read_csv(fp_in).set_index('index')
+
+ df_uuids = df_records.copy()
+ df_uuids['uuid'] = [uuid.uuid4()] * len(df_uuids)
+
+ for df_record in tqdm(df_records.itertuples(), total=len(df_uuids)):
+ image_index = df_record.Index
+ df_uuids.at[image_index, 'uuid'] = uuid.uuid4()
+
+ df_uuids = df_uuids.drop(['sha256', 'identity_index'], axis=1)
+ df_uuids.to_csv(fp_out) \ No newline at end of file