1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
'''
'''
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger
log = Logger.getLogger()
identity_sources = ['subdir', 'numeric']
@click.command()
@click.option('--data_store', 'opt_data_store',
type=cfg.DataStoreVar,
default=click_utils.get_default(types.DataStore.HDD),
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.option('--dataset', 'opt_dataset',
type=cfg.DatasetVar,
required=True,
show_default=True,
help=click_utils.show_help(types.Dataset))
@click.pass_context
def cli(ctx, opt_dataset, opt_data_store):
"""Fix identity key to be slug"""
import sys, os
from glob import glob
from os.path import join
from pathlib import Path
import pandas as pd
from glob import glob
from slugify import slugify
from tqdm import tqdm
from app.models.data_store import DataStore
data_store = DataStore(opt_data_store, opt_dataset)
fp_records = data_store.metadata(types.Metadata.FILE_RECORD)
# ----------------------------------------------------------------
# load csv and slugify
df_records = pd.read_csv(fp_records, dtype=cfg.FILE_RECORD_DTYPES).set_index('index')
records = df_records.to_dict('records')
for r in tqdm(records):
r['identity_key'] = slugify(r['identity_key'], separator='_')
df_records = pd.DataFrame.from_dict(records)
df_records.index.name = 'index'
df_records.to_csv(fp_records)
log.info(f'wrote: {fp_records}')
|