summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/records.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/records.py')
-rw-r--r--megapixels/commands/datasets/records.py40
1 files changed, 24 insertions, 16 deletions
diff --git a/megapixels/commands/datasets/records.py b/megapixels/commands/datasets/records.py
index 80de5040..b6ef618b 100644
--- a/megapixels/commands/datasets/records.py
+++ b/megapixels/commands/datasets/records.py
@@ -107,10 +107,12 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_dataset, opt_data_store, opt_dir_media,
# convert data to dict
data = []
+ indentity_count = 0
for sha256, fp_im in zip(sha256s, fp_ims):
fpp_im = Path(fp_im)
subdir = str(fpp_im.parent.relative_to(fp_in))
+
if opt_identity:
subdirs = subdir.split('/')
if not len(subdirs) > 0:
@@ -124,7 +126,8 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_dataset, opt_data_store, opt_dir_media,
elif opt_identity == 'subdir_tail':
identity = subdirs[-1] # use last part of subdir path
else:
- identity = ''
+ identity = indentity_count # use incrementing number
+ indentity_count += 1
data.append({
'subdir': subdir,
@@ -135,22 +138,27 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_dataset, opt_data_store, opt_dir_media,
'identity_key': identity
})
- log.info(f'adding identity index using: "{opt_identity}". This may take a while...')
- # convert dict to DataFrame
df_records = pd.DataFrame.from_dict(data)
- # sort based on identity_key
- df_records = df_records.sort_values(by=['identity_key'], ascending=True)
- # add new column for identity
- df_records['identity_index'] = [-1] * len(df_records)
- # populate the identity_index
- df_records_identity_groups = df_records.groupby('identity_key')
- # enumerate groups to create identity indices
- for identity_index, df_records_identity_group_tuple in enumerate(df_records_identity_groups):
- identity_key, df_records_identity_group = df_records_identity_group_tuple
- for ds_record in df_records_identity_group.itertuples():
- df_records.at[ds_record.Index, 'identity_index'] = identity_index
- # reset index after being sorted
- df_records = df_records.reset_index(drop=True)
+ if opt_identity:
+ log.info(f'adding identity index using: "{opt_identity}". This may take a while...')
+ # convert dict to DataFrame
+ # sort based on identity_key
+ df_records = df_records.sort_values(by=['identity_key'], ascending=True)
+ # add new column for identity
+ df_records['identity_index'] = [-1] * len(df_records)
+ # populate the identity_index
+ df_records_identity_groups = df_records.groupby('identity_key')
+ # enumerate groups to create identity indices
+ for identity_index, df_records_identity_group_tuple in enumerate(df_records_identity_groups):
+ identity_key, df_records_identity_group = df_records_identity_group_tuple
+ for ds_record in df_records_identity_group.itertuples():
+ df_records.at[ds_record.Index, 'identity_index'] = identity_index
+ # reset index after being sorted
+ df_records = df_records.reset_index(drop=True)
+ else:
+ # name everyone person 1, 2, 3...
+ pass
+
df_records.index.name = 'index' # reassign 'index' as primary key column
# write to CSV
file_utils.mkdirs(fp_out)