""" Begin with this file to process folder of images - Converts folders and subdirectories into CSV with file attributes split """ import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils.logger_utils import Logger log = Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', help='Override enum input filename CSV') @click.option('-o', '--output', 'opt_fp_out', help='Override enum output filename CSV') @click.option('--data_store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.NAS), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--dataset', 'opt_dataset', type=cfg.DatasetVar, required=True, show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') @click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, help='Use glob recursion (slower)') @click.option('-t', '--threads', 'opt_threads', default=4, help='Number of threads') @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite file') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_dataset, opt_slice, opt_recursive, opt_threads, opt_force): """Multithreading test""" from glob import glob from os.path import join from pathlib import Path import time from multiprocessing.dummy import Pool as ThreadPool import random import pandas as pd from tqdm import tqdm from glob import glob from app.models.data_store import DataStore from app.utils import file_utils, im_utils data_store = DataStore(opt_data_store, opt_dataset) fp_out = opt_fp_out if opt_fp_out is not None else data_store.metadata(types.Metadata.FILEPATH) if not opt_force and Path(fp_out).exists(): log.error('File exists. Use "-f / --force" to overwite') return # glob files fp_in = opt_fp_in if opt_fp_in is not None else data_store.media_images_original() fp_ims = [] log.info(f'Globbing {fp_in}') for ext in ['jpg', 'png']: if opt_recursive: fp_glob = join(fp_in, '**/*.{}'.format(ext)) fp_ims += glob(fp_glob, recursive=True) else: fp_glob = join(fp_in, '*.{}'.format(ext)) fp_ims += glob(fp_glob) if not fp_ims: log.warn('No images. Try with "--recursive"') return if opt_slice: fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] log.info('Found {:,} images'.format(len(fp_ims))) # convert data to dict data = [] for i, fp_im in enumerate(tqdm(fp_ims)): fpp_im = Path(fp_im) subdir = str(fpp_im.parent.relative_to(fp_in)) data.append( { 'subdir': subdir, 'fn': fpp_im.stem, 'ext': fpp_im.suffix.replace('.','') }) # save to CSV file_utils.mkdirs(fp_out) df_filepath = pd.DataFrame.from_dict(data) df_filepath = df_filepath.sort_values(by=['subdir'], ascending=True) df_filepath = df_filepath.reset_index() df_filepath.index.name = 'index' df_filepath.to_csv(fp_out)