diff options
Diffstat (limited to 'megapixels/commands/datasets/gen_uuid.py')
| -rw-r--r-- | megapixels/commands/datasets/gen_uuid.py | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/gen_uuid.py b/megapixels/commands/datasets/gen_uuid.py new file mode 100644 index 00000000..612c43ee --- /dev/null +++ b/megapixels/commands/datasets/gen_uuid.py @@ -0,0 +1,65 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', default=None, + help='Override enum input filename CSV') +@click.option('-o', '--output', 'opt_fp_out', default=None, + help='Override enum output filename CSV') +@click.option('--data_store', 'opt_data_store', + type=cfg.DataStoreVar, + default=click_utils.get_default(types.DataStore.NAS), + show_default=True, + help=click_utils.show_help(types.Dataset)) +@click.option('--dataset', 'opt_dataset', + type=cfg.DatasetVar, + required=True, + show_default=True, + help=click_utils.show_help(types.Dataset)) +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset, opt_force): + """Appends UUID to records CSV""" + + from glob import glob + from os.path import join + from pathlib import Path + import base64 + import uuid + + from tqdm import tqdm + import pandas as pd + + from app.models import DataStore + + + # set data_store + data_store = DataStore(opt_data_store, opt_dataset) + # get filepath out + fp_out = data_store.metadata(types.Metadata.UUID) if opt_fp_out is None else opt_fp_out + # exit if exists + if not opt_force and Path(fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + # load sha256 records + fp_in = data_store.metadata(types.Metadata.SHA256) if opt_fp_in is None else opt_fp_in + log.info(f'Loading: {fp_in}') + df_records = pd.read_csv(fp_in).set_index('index') + + df_uuids = df_records.copy() + df_uuids['uuid'] = [uuid.uuid4()] * len(df_uuids) + + for df_record in tqdm(df_records.itertuples(), total=len(df_uuids)): + image_index = df_record.Index + df_uuids.at[image_index, 'uuid'] = uuid.uuid4() + + df_uuids = df_uuids.drop(['sha256', 'identity_index'], axis=1) + df_uuids.to_csv(fp_out)
\ No newline at end of file |
