diff options
Diffstat (limited to 'megapixels/commands/datasets')
| -rw-r--r-- | megapixels/commands/datasets/add_uuid.py | 44 | ||||
| -rw-r--r-- | megapixels/commands/datasets/feret.py | 139 | ||||
| -rw-r--r-- | megapixels/commands/datasets/s3.py | 47 | ||||
| -rw-r--r-- | megapixels/commands/datasets/symlink.py | 45 | ||||
| -rw-r--r-- | megapixels/commands/datasets/vecs_to_id.py | 50 | ||||
| -rw-r--r-- | megapixels/commands/datasets/vecs_to_uuid.py | 56 |
6 files changed, 381 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/add_uuid.py b/megapixels/commands/datasets/add_uuid.py new file mode 100644 index 00000000..9c14c0e3 --- /dev/null +++ b/megapixels/commands/datasets/add_uuid.py @@ -0,0 +1,44 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', + help='Output directory') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_force): + """Appends UUID to records CSV""" + + from glob import glob + from os.path import join + from pathlib import Path + import base64 + import uuid + + from tqdm import tqdm + import pandas as pd + + if not opt_force and Path(opt_fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + # load names + df_records = pd.read_csv(opt_fp_in) + records = df_records.to_dict('index') + # append a UUID to every entry + for idx, item in records.items(): + records[idx]['uuid'] = uuid.uuid4() + # save to csv + df_uuid = pd.DataFrame.from_dict(list(records.values())) # ignore the indices + df_uuid.to_csv(opt_fp_out, index=False) + + log.info('done')
\ No newline at end of file diff --git a/megapixels/commands/datasets/feret.py b/megapixels/commands/datasets/feret.py new file mode 100644 index 00000000..906b4e37 --- /dev/null +++ b/megapixels/commands/datasets/feret.py @@ -0,0 +1,139 @@ +import bz2 +import io + +import click +from PIL import Image + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +pose_choices = { +'fa':0, 'fb':0, 'hl':67.5, 'hr':-67.5, 'pl':90, 'pr':-90, +'ql':22.5, 'qr':-22.5, 'ra':45, 'rb':15, 'rc':-15, 'rd':-45, 're':-75} + +poses_left = ['hl', 'ql', 'pl', 'ra', 'rb'] +poses_right = ['hr', 'qr', 'pr', 'rc', 're', 're'] + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output directory') +@click.option('-a', '--angle', 'opt_angle', type=(float, float), default=(0,0), + help='Min/max face angles') +@click.option('-t', '--threads', 'opt_threads', default=8, + help='Number of threads') +@click.option('--flip', 'opt_flip', type=click.Choice(['r', 'l']), + help='Flip profile images to the R or L') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_angle, opt_threads, opt_flip): + """Extracts FERET images""" + + from glob import glob + from os.path import join + from pathlib import Path + import time + from tqdm import tqdm + from multiprocessing.dummy import Pool as ThreadPool + from functools import partial + + from PIL import ImageOps + from app.utils import file_utils + + # filter angles + poses = [k for k, v in pose_choices.items() if \ + abs(v) >= opt_angle[0] and abs(v) <= opt_angle[1]] + + # glob images dir for all *ppm.bz2 + fp_ims = [] + for pose in poses: + log.info('globbing pose: {}'.format(pose)) + fp_ims += glob(join(opt_fp_in, '**/*_{}.ppm.bz2').format(pose)) + log.info('Processing: {:,} files'.format(len(fp_ims))) + + # convert bz2 to png + def pool_func(fp_im, opt_fp_out, opt_flip): + try: + pbar.update(1) + im_pil = bz2_to_pil(fp_im) + fpp_im = Path(fp_im) + fp_out = join(opt_fp_out, '{}.png'.format(fpp_im.stem)) + fp_out = fp_out.replace('.ppm','') # remove ppm + if opt_flip: + pose_code = fpp_im.stem.split('_')[-1][:2] + # log.debug('opt_flip: {}, found: {}'.format(opt_flip, pose_code)) + if opt_flip == 'r' and pose_code in poses_right \ + or opt_flip == 'l' and pose_code in poses_left: + im_pil = ImageOps.mirror(im_pil) + im_pil.save(fp_out) + return True + except Exception as e: + log.error('Error processing: {}, error: {}'.format(fp_im, e)) + return False + + # make output directory + file_utils.mkdirs(opt_fp_out) + + # setup multithreading + pbar = tqdm(total=len(fp_ims)) + pool_resize = partial(pool_func, opt_fp_out=opt_fp_out, opt_flip=opt_flip) + pool = ThreadPool(opt_threads) + with tqdm(total=len(fp_ims)) as pbar: + results = pool.map(pool_resize, fp_ims) + pbar.close() + + # results + log.info('Converted: {} / {} images'.format(results.count(True), len(fp_ims))) + + +# ------------------------------------------------------------------ +# local utils + +def bz2_to_pil(fp_src): + with open(fp_src, 'rb') as fp: + im_raw = bz2.decompress(fp.read()) + im_pil = Image.open(io.BytesIO(im_raw)) + return im_pil + + + +""" + +A breakdown of the images by pose is: + Pose Angle Images Subjects + fa 0 1364 994 + fb 0 1358 993 + hl +67.5 1267 917 + hr -67.5 1320 953 + pl +90 1312 960 + pr -90 1363 994 + ql +22.5 761 501 + qr -22.5 761 501 + ra +45 321 261 + rb +15 321 261 + rc -15 610 423 + rd -45 290 236 + re -75 290 236 + + There are 13 different poses. (The orientation "right" means +facing the photographer's right.) + fa regular frontal image + fb alternative frontal image, taken shortly after the + corresponding fa image + pl profile left + hl half left - head turned about 67.5 degrees left + ql quarter left - head turned about 22.5 degrees left + pr profile right + hr half right - head turned about 67.5 degrees right + qr quarter right - head turned about 22.5 degrees right + ra random image - head turned about 45 degree left + rb random image - head turned about 15 degree left + rc random image - head turned about 15 degree right + rd random image - head turned about 45 degree right + re random image - head turned about 75 degree right + +"""
\ No newline at end of file diff --git a/megapixels/commands/datasets/s3.py b/megapixels/commands/datasets/s3.py new file mode 100644 index 00000000..7769896b --- /dev/null +++ b/megapixels/commands/datasets/s3.py @@ -0,0 +1,47 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +s3_dirs = {'media': cfg.S3_MEDIA_ROOT, 'metadata': cfg.S3_METADATA_ROOT} + +@click.command() +@click.option('-i', '--input', 'opt_fps_in', required=True, multiple=True, + help='Input directory') +@click.option('--name', 'opt_dataset_name', required=True, + help='Dataset key (eg "lfw"') +@click.option('-a', '--action', 'opt_action', type=click.Choice(['sync', 'put']), default='sync', + help='S3 action') +@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True, + help='S3 location') +@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False) +@click.pass_context +def cli(ctx, opt_fps_in, opt_dataset_name, opt_action, opt_type, opt_dryrun): + """Syncs files with S3/spaces server""" + + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import pandas as pd + import subprocess + + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + for opt_fp_in in opt_fps_in: + dir_dst = join(s3_dirs[opt_type], opt_dataset_name, '') + if Path(opt_fp_in).is_dir(): + fp_src = join(opt_fp_in, '') # add trailing slashes + else: + fp_src = join(opt_fp_in) + cmd = ['s3cmd', opt_action, fp_src, dir_dst, '-P', '--follow-symlinks'] + log.info(' '.join(cmd)) + if not opt_dryrun: + subprocess.call(cmd) + +
\ No newline at end of file diff --git a/megapixels/commands/datasets/symlink.py b/megapixels/commands/datasets/symlink.py new file mode 100644 index 00000000..70ec6c46 --- /dev/null +++ b/megapixels/commands/datasets/symlink.py @@ -0,0 +1,45 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input records CSV') +@click.option('-m', '--media', 'opt_fp_media', required=True, + help='Input media directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output directory') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out): + """Symlinks images to new directory for S3""" + + import sys + import os + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import pandas as pd + + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + df_records = pd.read_csv(opt_fp_in) + nrows = len(df_records) + + file_utils.mkdirs(opt_fp_out) + + for record_id, row in tqdm(df_records.iterrows(), total=nrows): + # make image path + df = df_records.iloc[record_id] + fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext']))) + fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext']))) + fpp_dst.symlink_to(fpp_src) + + log.info('symlinked {:,} files'.format(nrows))
\ No newline at end of file diff --git a/megapixels/commands/datasets/vecs_to_id.py b/megapixels/commands/datasets/vecs_to_id.py new file mode 100644 index 00000000..07c7389e --- /dev/null +++ b/megapixels/commands/datasets/vecs_to_id.py @@ -0,0 +1,50 @@ +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-r', '--records', 'opt_fp_records', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output JSON') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force): + """Merges ID with face vectors""" + + import sys + import os + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import pandas as pd + + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + df_vecs = pd.read_csv(opt_fp_in) + df_records = pd.read_csv(opt_fp_records) + nrows = len(df_vecs) + + # face vecs + id_vecs = {} + + for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows): + record_id = int(row['id']) + vec = row['vec'].split(',') + id_vecs[record_id] = vec + + # save as JSON + file_utils.write_json(id_vecs, opt_fp_out, verbose=True) + +
\ No newline at end of file diff --git a/megapixels/commands/datasets/vecs_to_uuid.py b/megapixels/commands/datasets/vecs_to_uuid.py new file mode 100644 index 00000000..7bb82083 --- /dev/null +++ b/megapixels/commands/datasets/vecs_to_uuid.py @@ -0,0 +1,56 @@ +""" +Crop images to prepare for training +""" + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-r', '--records', 'opt_fp_records', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output JSON') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force): + """Merges UUID with face vectors""" + + import sys + import os + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import pandas as pd + + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + df_vecs = pd.read_csv(opt_fp_in) + df_records = pd.read_csv(opt_fp_records) + nrows = len(df_vecs) + + # face vecs + uuid_vecs = {} + + for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows): + # make image path + record_id = int(row['id']) + uuid = df_records.iloc[record_id]['uuid'] + vec = row['vec'].split(',') + uuid_vecs[uuid] = vec + + # save as JSON + file_utils.write_json(uuid_vecs, opt_fp_out) + +
\ No newline at end of file |
