""" Crop images to prepare for training """ import click import cv2 as cv from PIL import Image, ImageOps, ImageFilter from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg cv_resize_algos = { 'area': cv.INTER_AREA, 'lanco': cv.INTER_LANCZOS4, 'linear': cv.INTER_LINEAR, 'linear_exact': cv.INTER_LINEAR_EXACT, 'nearest': cv.INTER_NEAREST } """ Filter Q-Down Q-Up Speed NEAREST ⭐⭐⭐⭐⭐ BOX ⭐ ⭐⭐⭐⭐ BILINEAR ⭐ ⭐ ⭐⭐⭐ HAMMING ⭐⭐ ⭐⭐⭐ BICUBIC ⭐⭐⭐ ⭐⭐⭐ ⭐⭐ LANCZOS ⭐⭐⭐⭐ ⭐⭐⭐⭐ ⭐ """ pil_resize_algos = { 'antialias': Image.ANTIALIAS, 'lanczos': Image.LANCZOS, 'bicubic': Image.BICUBIC, 'hamming': Image.HAMMING, 'bileaner': Image.BILINEAR, 'box': Image.BOX, 'nearest': Image.NEAREST } @click.command() @click.option('--dataset', 'opt_dataset', type=cfg.DatasetVar, required=True, show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('--store', 'opt_data_store', type=cfg.DataStoreVar, default=click_utils.get_default(types.DataStore.HDD), show_default=True, help=click_utils.show_help(types.Dataset)) @click.option('-o', '--output', 'opt_dir_out', required=True, help='Output directory') @click.option('-e', '--ext', 'opt_glob_ext', default='png', type=click.Choice(['jpg', 'png']), help='File glob ext') @click.option('--size', 'opt_size', type=(int, int), default=(256, 256), help='Output image size max (w,h)') @click.option('--interp', 'opt_interp_algo', type=click.Choice(pil_resize_algos.keys()), default='bicubic', help='Interpolation resizing algorithms') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice the input list') @click.option('-t', '--threads', 'opt_threads', default=8, help='Number of threads') @click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, help='Use glob recursion (slower)') @click.pass_context def cli(ctx, opt_dataset, opt_data_store, opt_dir_out, opt_glob_ext, opt_size, opt_interp_algo, opt_slice, opt_threads, opt_recursive): """Resize dataset images""" import os from os.path import join from pathlib import Path from glob import glob from tqdm import tqdm from multiprocessing.dummy import Pool as ThreadPool from functools import partial import pandas as pd import numpy as np from app.utils import logger_utils, file_utils, im_utils from app.models.data_store import DataStore # ------------------------------------------------- # init log = logger_utils.Logger.getLogger() # ------------------------------------------------- # process here def pool_resize(fp_in, dir_in, dir_out, im_size, interp_algo): # Threaded image resize function pbar.update(1) try: im = Image.open(fp_in).convert('RGB') im.verify() # throws error if image is corrupt im.thumbnail(im_size, interp_algo) fp_out = fp_in.replace(dir_in, dir_out) file_utils.mkdirs(fp_out) im.save(fp_out, quality=100) except Exception as e: log.warn(f'Could not open: {fp_in}, Error: {e}') return False return True data_store = DataStore(opt_data_store, opt_dataset) fp_records = data_store.metadata(types.Metadata.FILE_RECORD) df_records = pd.read_csv(fp_records, dtype=cfg.FILE_RECORD_DTYPES).set_index('index') dir_in = data_store.media_images_original() # get list of files to process #fp_ims = file_utils.glob_multi(opt_dir_in, ['jpg', 'png'], recursive=opt_recursive) fp_ims = [] for ds_record in df_records.itertuples(): fp_im = data_store.face(ds_record.subdir, ds_record.fn, ds_record.ext) fp_ims.append(fp_im) if opt_slice: fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] if not fp_ims: log.error('No images. Try with "--recursive"') return log.info(f'processing {len(fp_ims):,} images') # algorithm to use for resizing interp_algo = pil_resize_algos[opt_interp_algo] log.info(f'using {interp_algo} for interpoloation') # ensure output dir exists file_utils.mkdirs(opt_dir_out) # setup multithreading pbar = tqdm(total=len(fp_ims)) # fixed arguments for pool function map_pool_resize = partial(pool_resize, dir_in=dir_in, dir_out=opt_dir_out, im_size=opt_size, interp_algo=interp_algo) #result_list = pool.map(prod_x, data_list) # simple pool = ThreadPool(opt_threads) # start multithreading with tqdm(total=len(fp_ims)) as pbar: results = pool.map(map_pool_resize, fp_ims) # end multithreading pbar.close() log.info(f'Resized: {results.count(True)} / {len(fp_ims)} images')