''' ''' import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils.logger_utils import Logger log = Logger.getLogger() identity_sources = ['subdir', 'numeric'] @click.command() @click.option('--data_store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.HDD), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--dataset', 'opt_dataset', type=cfg.DatasetVar, required=True, show_default=True, help=click_utils.show_help(types.Dataset)) @click.pass_context def cli(ctx, opt_dataset, opt_data_store): """Fix identity key to be slug""" import sys, os from glob import glob from os.path import join from pathlib import Path import pandas as pd from glob import glob from slugify import slugify from tqdm import tqdm from app.models.data_store import DataStore data_store = DataStore(opt_data_store, opt_dataset) fp_records = data_store.metadata(types.Metadata.FILE_RECORD) # ---------------------------------------------------------------- # load csv and slugify df_records = pd.read_csv(fp_records, dtype=cfg.FILE_RECORD_DTYPES).set_index('index') records = df_records.to_dict('records') for r in tqdm(records): r['identity_key'] = slugify(r['identity_key'], separator='_') df_records = pd.DataFrame.from_dict(records) df_records.index.name = 'index' df_records.to_csv(fp_records) log.info(f'wrote: {fp_records}')