diff options
| author | adamhrv <adam@ahprojects.com> | 2018-12-16 19:37:58 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2018-12-16 19:37:58 +0100 |
| commit | c3839ea797401d740db64691c0b4922c935b131c (patch) | |
| tree | ef64b6b441dd677a41f79a423af8b7a44e68b23f /megapixels/commands/datasets/file_meta.py | |
| parent | 10f467b64e3be528ac246d5cf664d675aca3e7f3 (diff) | |
still sorting CSV vectors indexes
Diffstat (limited to 'megapixels/commands/datasets/file_meta.py')
| -rw-r--r-- | megapixels/commands/datasets/file_meta.py | 84 |
1 files changed, 0 insertions, 84 deletions
diff --git a/megapixels/commands/datasets/file_meta.py b/megapixels/commands/datasets/file_meta.py deleted file mode 100644 index e1456f44..00000000 --- a/megapixels/commands/datasets/file_meta.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Begin with this file to process folder of images -- Converts folders and subdirectories into CSV with file attributes split -""" -import click - -from app.settings import types -from app.utils import click_utils -from app.settings import app_cfg as cfg -from app.utils.logger_utils import Logger - -log = Logger.getLogger() - -@click.command() -@click.option('-i', '--input', 'opt_fp_in', required=True, - help='Input directory') -@click.option('-o', '--output', 'opt_fp_out', required=True, - help='Output file for file meta CSV') -@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), - help='Slice list of files') -@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, - help='Use glob recursion (slower)') -@click.option('-t', '--threads', 'opt_threads', default=4, - help='Number of threads') -@click.option('-f', '--force', 'opt_force', is_flag=True, - help='Force overwrite file') -@click.pass_context -def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_recursive, opt_threads, opt_force): - """Multithreading test""" - - from glob import glob - from os.path import join - from pathlib import Path - import time - from multiprocessing.dummy import Pool as ThreadPool - import random - - import pandas as pd - from tqdm import tqdm - from glob import glob - - from app.utils import file_utils, im_utils - - - if not opt_force and Path(opt_fp_out).exists(): - log.error('File exists. Use "-f / --force" to overwite') - return - - fp_ims = [] - log.info(f'Globbing {opt_fp_in}') - for ext in ['jpg', 'png']: - if opt_recursive: - fp_glob = join(opt_fp_in, '**/*.{}'.format(ext)) - fp_ims += glob(fp_glob, recursive=True) - else: - fp_glob = join(opt_fp_in, '*.{}'.format(ext)) - fp_ims += glob(fp_glob) - - if not fp_ims: - log.warn('No images. Try with "--recursive"') - return - - if opt_slice: - fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] - - log.info('Processing {:,} images'.format(len(fp_ims))) - - - # convert data to dict - data = [] - for i, fp_im in enumerate(tqdm(fp_ims)): - fpp_im = Path(fp_im) - subdir = str(fpp_im.parent.relative_to(opt_fp_in)) - data.append( { - 'subdir': subdir, - 'fn': fpp_im.stem, - 'ext': fpp_im.suffix.replace('.','') - }) - - # save to CSV - file_utils.mkdirs(opt_fp_out) - df = pd.DataFrame.from_dict(data) - df.index.name = 'index' - df.to_csv(opt_fp_out)
\ No newline at end of file |
