summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets')
-rw-r--r--megapixels/commands/datasets/add_uuid.py44
-rw-r--r--megapixels/commands/datasets/feret.py139
-rw-r--r--megapixels/commands/datasets/s3.py47
-rw-r--r--megapixels/commands/datasets/symlink.py45
-rw-r--r--megapixels/commands/datasets/vecs_to_id.py50
-rw-r--r--megapixels/commands/datasets/vecs_to_uuid.py56
6 files changed, 381 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/add_uuid.py b/megapixels/commands/datasets/add_uuid.py
new file mode 100644
index 00000000..9c14c0e3
--- /dev/null
+++ b/megapixels/commands/datasets/add_uuid.py
@@ -0,0 +1,44 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+ help='Output directory')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_force):
+ """Appends UUID to records CSV"""
+
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import base64
+ import uuid
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ if not opt_force and Path(opt_fp_out).exists():
+ log.error('File exists. Use "-f / --force" to overwite')
+ return
+
+ # load names
+ df_records = pd.read_csv(opt_fp_in)
+ records = df_records.to_dict('index')
+ # append a UUID to every entry
+ for idx, item in records.items():
+ records[idx]['uuid'] = uuid.uuid4()
+ # save to csv
+ df_uuid = pd.DataFrame.from_dict(list(records.values())) # ignore the indices
+ df_uuid.to_csv(opt_fp_out, index=False)
+
+ log.info('done') \ No newline at end of file
diff --git a/megapixels/commands/datasets/feret.py b/megapixels/commands/datasets/feret.py
new file mode 100644
index 00000000..906b4e37
--- /dev/null
+++ b/megapixels/commands/datasets/feret.py
@@ -0,0 +1,139 @@
+import bz2
+import io
+
+import click
+from PIL import Image
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils.logger_utils import Logger
+
+log = Logger.getLogger()
+
+pose_choices = {
+'fa':0, 'fb':0, 'hl':67.5, 'hr':-67.5, 'pl':90, 'pr':-90,
+'ql':22.5, 'qr':-22.5, 'ra':45, 'rb':15, 'rc':-15, 'rd':-45, 're':-75}
+
+poses_left = ['hl', 'ql', 'pl', 'ra', 'rb']
+poses_right = ['hr', 'qr', 'pr', 'rc', 're', 're']
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output directory')
+@click.option('-a', '--angle', 'opt_angle', type=(float, float), default=(0,0),
+ help='Min/max face angles')
+@click.option('-t', '--threads', 'opt_threads', default=8,
+ help='Number of threads')
+@click.option('--flip', 'opt_flip', type=click.Choice(['r', 'l']),
+ help='Flip profile images to the R or L')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_angle, opt_threads, opt_flip):
+ """Extracts FERET images"""
+
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import time
+ from tqdm import tqdm
+ from multiprocessing.dummy import Pool as ThreadPool
+ from functools import partial
+
+ from PIL import ImageOps
+ from app.utils import file_utils
+
+ # filter angles
+ poses = [k for k, v in pose_choices.items() if \
+ abs(v) >= opt_angle[0] and abs(v) <= opt_angle[1]]
+
+ # glob images dir for all *ppm.bz2
+ fp_ims = []
+ for pose in poses:
+ log.info('globbing pose: {}'.format(pose))
+ fp_ims += glob(join(opt_fp_in, '**/*_{}.ppm.bz2').format(pose))
+ log.info('Processing: {:,} files'.format(len(fp_ims)))
+
+ # convert bz2 to png
+ def pool_func(fp_im, opt_fp_out, opt_flip):
+ try:
+ pbar.update(1)
+ im_pil = bz2_to_pil(fp_im)
+ fpp_im = Path(fp_im)
+ fp_out = join(opt_fp_out, '{}.png'.format(fpp_im.stem))
+ fp_out = fp_out.replace('.ppm','') # remove ppm
+ if opt_flip:
+ pose_code = fpp_im.stem.split('_')[-1][:2]
+ # log.debug('opt_flip: {}, found: {}'.format(opt_flip, pose_code))
+ if opt_flip == 'r' and pose_code in poses_right \
+ or opt_flip == 'l' and pose_code in poses_left:
+ im_pil = ImageOps.mirror(im_pil)
+ im_pil.save(fp_out)
+ return True
+ except Exception as e:
+ log.error('Error processing: {}, error: {}'.format(fp_im, e))
+ return False
+
+ # make output directory
+ file_utils.mkdirs(opt_fp_out)
+
+ # setup multithreading
+ pbar = tqdm(total=len(fp_ims))
+ pool_resize = partial(pool_func, opt_fp_out=opt_fp_out, opt_flip=opt_flip)
+ pool = ThreadPool(opt_threads)
+ with tqdm(total=len(fp_ims)) as pbar:
+ results = pool.map(pool_resize, fp_ims)
+ pbar.close()
+
+ # results
+ log.info('Converted: {} / {} images'.format(results.count(True), len(fp_ims)))
+
+
+# ------------------------------------------------------------------
+# local utils
+
+def bz2_to_pil(fp_src):
+ with open(fp_src, 'rb') as fp:
+ im_raw = bz2.decompress(fp.read())
+ im_pil = Image.open(io.BytesIO(im_raw))
+ return im_pil
+
+
+
+"""
+
+A breakdown of the images by pose is:
+ Pose Angle Images Subjects
+ fa 0 1364 994
+ fb 0 1358 993
+ hl +67.5 1267 917
+ hr -67.5 1320 953
+ pl +90 1312 960
+ pr -90 1363 994
+ ql +22.5 761 501
+ qr -22.5 761 501
+ ra +45 321 261
+ rb +15 321 261
+ rc -15 610 423
+ rd -45 290 236
+ re -75 290 236
+
+ There are 13 different poses. (The orientation "right" means
+facing the photographer's right.)
+ fa regular frontal image
+ fb alternative frontal image, taken shortly after the
+ corresponding fa image
+ pl profile left
+ hl half left - head turned about 67.5 degrees left
+ ql quarter left - head turned about 22.5 degrees left
+ pr profile right
+ hr half right - head turned about 67.5 degrees right
+ qr quarter right - head turned about 22.5 degrees right
+ ra random image - head turned about 45 degree left
+ rb random image - head turned about 15 degree left
+ rc random image - head turned about 15 degree right
+ rd random image - head turned about 45 degree right
+ re random image - head turned about 75 degree right
+
+""" \ No newline at end of file
diff --git a/megapixels/commands/datasets/s3.py b/megapixels/commands/datasets/s3.py
new file mode 100644
index 00000000..7769896b
--- /dev/null
+++ b/megapixels/commands/datasets/s3.py
@@ -0,0 +1,47 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+s3_dirs = {'media': cfg.S3_MEDIA_ROOT, 'metadata': cfg.S3_METADATA_ROOT}
+
+@click.command()
+@click.option('-i', '--input', 'opt_fps_in', required=True, multiple=True,
+ help='Input directory')
+@click.option('--name', 'opt_dataset_name', required=True,
+ help='Dataset key (eg "lfw"')
+@click.option('-a', '--action', 'opt_action', type=click.Choice(['sync', 'put']), default='sync',
+ help='S3 action')
+@click.option('-t', '--type', 'opt_type', type=click.Choice(s3_dirs.keys()), required=True,
+ help='S3 location')
+@click.option('--dry-run', 'opt_dryrun', is_flag=True, default=False)
+@click.pass_context
+def cli(ctx, opt_fps_in, opt_dataset_name, opt_action, opt_type, opt_dryrun):
+ """Syncs files with S3/spaces server"""
+
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+ import subprocess
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+ for opt_fp_in in opt_fps_in:
+ dir_dst = join(s3_dirs[opt_type], opt_dataset_name, '')
+ if Path(opt_fp_in).is_dir():
+ fp_src = join(opt_fp_in, '') # add trailing slashes
+ else:
+ fp_src = join(opt_fp_in)
+ cmd = ['s3cmd', opt_action, fp_src, dir_dst, '-P', '--follow-symlinks']
+ log.info(' '.join(cmd))
+ if not opt_dryrun:
+ subprocess.call(cmd)
+
+ \ No newline at end of file
diff --git a/megapixels/commands/datasets/symlink.py b/megapixels/commands/datasets/symlink.py
new file mode 100644
index 00000000..70ec6c46
--- /dev/null
+++ b/megapixels/commands/datasets/symlink.py
@@ -0,0 +1,45 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input records CSV')
+@click.option('-m', '--media', 'opt_fp_media', required=True,
+ help='Input media directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_media, opt_fp_out):
+ """Symlinks images to new directory for S3"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ df_records = pd.read_csv(opt_fp_in)
+ nrows = len(df_records)
+
+ file_utils.mkdirs(opt_fp_out)
+
+ for record_id, row in tqdm(df_records.iterrows(), total=nrows):
+ # make image path
+ df = df_records.iloc[record_id]
+ fpp_src = Path(join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])))
+ fpp_dst = Path(join(opt_fp_out, '{}.{}'.format(df['uuid'], df['ext'])))
+ fpp_dst.symlink_to(fpp_src)
+
+ log.info('symlinked {:,} files'.format(nrows)) \ No newline at end of file
diff --git a/megapixels/commands/datasets/vecs_to_id.py b/megapixels/commands/datasets/vecs_to_id.py
new file mode 100644
index 00000000..07c7389e
--- /dev/null
+++ b/megapixels/commands/datasets/vecs_to_id.py
@@ -0,0 +1,50 @@
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-r', '--records', 'opt_fp_records', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output JSON')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force):
+ """Merges ID with face vectors"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ df_vecs = pd.read_csv(opt_fp_in)
+ df_records = pd.read_csv(opt_fp_records)
+ nrows = len(df_vecs)
+
+ # face vecs
+ id_vecs = {}
+
+ for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows):
+ record_id = int(row['id'])
+ vec = row['vec'].split(',')
+ id_vecs[record_id] = vec
+
+ # save as JSON
+ file_utils.write_json(id_vecs, opt_fp_out, verbose=True)
+
+ \ No newline at end of file
diff --git a/megapixels/commands/datasets/vecs_to_uuid.py b/megapixels/commands/datasets/vecs_to_uuid.py
new file mode 100644
index 00000000..7bb82083
--- /dev/null
+++ b/megapixels/commands/datasets/vecs_to_uuid.py
@@ -0,0 +1,56 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-r', '--records', 'opt_fp_records', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Output JSON')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out,opt_force):
+ """Merges UUID with face vectors"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+
+ from tqdm import tqdm
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ df_vecs = pd.read_csv(opt_fp_in)
+ df_records = pd.read_csv(opt_fp_records)
+ nrows = len(df_vecs)
+
+ # face vecs
+ uuid_vecs = {}
+
+ for roi_idx, row in tqdm(df_vecs.iterrows(), total=nrows):
+ # make image path
+ record_id = int(row['id'])
+ uuid = df_records.iloc[record_id]['uuid']
+ vec = row['vec'].split(',')
+ uuid_vecs[uuid] = vec
+
+ # save as JSON
+ file_utils.write_json(uuid_vecs, opt_fp_out)
+
+ \ No newline at end of file