From 5891e2f13ae9dfead0e1794c399e5ff813e694d3 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Fri, 14 Dec 2018 02:06:39 +0100 Subject: added FR demo notebook --- megapixels/commands/cv/_old_files_to_face_rois.py | 168 ++++++++++++++++++++++ megapixels/commands/cv/embeddings.py | 100 ------------- megapixels/commands/cv/face_vec_to_csv.py | 110 ++++++++++++++ megapixels/commands/cv/faces_to_csv.py | 168 ---------------------- megapixels/commands/cv/faces_to_csv_indexed.py | 156 -------------------- megapixels/commands/cv/files_to_rois.py | 156 ++++++++++++++++++++ megapixels/commands/datasets/file_meta.py | 84 +++++++++++ megapixels/commands/datasets/sha256.py | 55 ++++--- 8 files changed, 545 insertions(+), 452 deletions(-) create mode 100644 megapixels/commands/cv/_old_files_to_face_rois.py delete mode 100644 megapixels/commands/cv/embeddings.py create mode 100644 megapixels/commands/cv/face_vec_to_csv.py delete mode 100644 megapixels/commands/cv/faces_to_csv.py delete mode 100644 megapixels/commands/cv/faces_to_csv_indexed.py create mode 100644 megapixels/commands/cv/files_to_rois.py create mode 100644 megapixels/commands/datasets/file_meta.py (limited to 'megapixels/commands') diff --git a/megapixels/commands/cv/_old_files_to_face_rois.py b/megapixels/commands/cv/_old_files_to_face_rois.py new file mode 100644 index 00000000..d92cbd74 --- /dev/null +++ b/megapixels/commands/cv/_old_files_to_face_rois.py @@ -0,0 +1,168 @@ +""" +Crop images to prepare for training +""" + +import click +# from PIL import Image, ImageOps, ImageFilter, ImageDraw + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +color_filters = {'color': 1, 'gray': 2, 'all': 3} + +@click.command() +@click.option('-i', '--input', 'opt_fp_files', required=True, + help='Input file meta CSV') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output CSV') +@click.option('-e', '--ext', 'opt_ext', + default='jpg', type=click.Choice(['jpg', 'png']), + help='File glob ext') +@click.option('--size', 'opt_size', + type=(int, int), default=(300, 300), + help='Output image size') +@click.option('-t', '--detector-type', 'opt_detector_type', + type=cfg.FaceDetectNetVar, + default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN), + help=click_utils.show_help(types.FaceDetectNet)) +@click.option('-g', '--gpu', 'opt_gpu', default=0, + help='GPU index') +@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1), + help='Confidence minimum threshold') +@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4), + help='Number pyramids to upscale for DLIB detectors') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, + help='Display detections to debug') +@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, + help='Use glob recursion (slower)') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.option('--color', 'opt_color_filter', + type=click.Choice(color_filters.keys()), default='color', + help='Filter to keep color or grayscale images (color = keep color') +@click.pass_context +def cli(ctx, opt_dirs_in, opt_fp_out, opt_ext, opt_size, opt_detector_type, + opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_recursive, opt_force, opt_color_filter): + """Converts frames with faces to CSV of ROIs""" + + import sys + import os + from os.path import join + from pathlib import Path + from glob import glob + + from tqdm import tqdm + import numpy as np + import dlib # must keep a local reference for dlib + import cv2 as cv + import pandas as pd + + from app.utils import logger_utils, file_utils, im_utils + from app.processors import face_detector + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + if not opt_force and Path(opt_fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + if opt_detector_type == types.FaceDetectNet.CVDNN: + detector = face_detector.DetectorCVDNN() + elif opt_detector_type == types.FaceDetectNet.DLIB_CNN: + detector = face_detector.DetectorDLIBCNN(opt_gpu) + elif opt_detector_type == types.FaceDetectNet.DLIB_HOG: + detector = face_detector.DetectorDLIBHOG() + elif opt_detector_type == types.FaceDetectNet.MTCNN: + detector = face_detector.DetectorMTCNN() + elif opt_detector_type == types.FaceDetectNet.HAAR: + log.error('{} not yet implemented'.format(opt_detector_type.name)) + return + + + # ------------------------------------------------- + # process here + color_filter = color_filters[opt_color_filter] + + # get list of files to process + fp_ims = [] + for opt_dir_in in opt_dirs_in: + if opt_recursive: + fp_glob = join(opt_dir_in, '**/*.{}'.format(opt_ext)) + fp_ims += glob(fp_glob, recursive=True) + else: + fp_glob = join(opt_dir_in, '*.{}'.format(opt_ext)) + fp_ims += glob(fp_glob) + log.debug(fp_glob) + + + if opt_slice: + fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] + log.debug('processing {:,} files'.format(len(fp_ims))) + + + data = [] + + for fp_im in tqdm(fp_ims): + im = cv.imread(fp_im) + + # filter out color or grayscale iamges + if color_filter != color_filters['all']: + try: + is_gray = im_utils.is_grayscale(im) + if is_gray and color_filter != color_filters['gray']: + log.debug('Skipping grayscale image: {}'.format(fp_im)) + continue + except Exception as e: + log.error('Could not check grayscale: {}'.format(fp_im)) + continue + + try: + bboxes = detector.detect(im, opt_size=opt_size, opt_pyramids=opt_pyramids) + except Exception as e: + log.error('could not detect: {}'.format(fp_im)) + log.error('{}'.format(e)) + fpp_im = Path(fp_im) + subdir = str(fpp_im.parent.relative_to(opt_dir_in)) + + for bbox in bboxes: + # log.debug('is square: {}'.format(bbox.w == bbox.h)) + nw,nh = int(bbox.w * im.shape[1]), int(bbox.h * im.shape[0]) + roi = { + 'fn': fpp_im.stem, + 'ext': fpp_im.suffix.replace('.',''), + 'x': bbox.x, + 'y': bbox.y, + 'w': bbox.w, + 'h': bbox.h, + 'image_height': im.shape[0], + 'image_width': im.shape[1], + 'subdir': subdir} + bbox_dim = bbox.to_dim(im.shape[:2][::-1]) # w,h + data.append(roi) + + # debug display + if opt_display and len(bboxes): + im_md = im_utils.resize(im, width=min(1200, opt_size[0])) + for bbox in bboxes: + bbox_dim = bbox.to_dim(im_md.shape[:2][::-1]) + cv.rectangle(im_md, bbox_dim.pt_tl, bbox_dim.pt_br, (0,255,0), 3) + cv.imshow('', im_md) + while True: + k = cv.waitKey(1) & 0xFF + if k == 27 or k == ord('q'): # ESC + cv.destroyAllWindows() + sys.exit() + elif k != 255: + # any key to continue + break + + # save date + file_utils.mkdirs(opt_fp_out) + df = pd.DataFrame.from_dict(data) + df.to_csv(opt_fp_out, index=False) \ No newline at end of file diff --git a/megapixels/commands/cv/embeddings.py b/megapixels/commands/cv/embeddings.py deleted file mode 100644 index 9cb26ae7..00000000 --- a/megapixels/commands/cv/embeddings.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -Crop images to prepare for training -""" - -import click - -from app.settings import types -from app.utils import click_utils -from app.settings import app_cfg as cfg - -@click.command() -@click.option('-i', '--input', 'opt_fp_in', required=True, - help='Input directory') -@click.option('-r', '--records', 'opt_fp_records', required=True, - help='Input directory') -@click.option('-m', '--media', 'opt_fp_media', required=True, - help='Image directory') -@click.option('-o', '--output', 'opt_fp_out', required=True, - help='Output CSV') -@click.option('--size', 'opt_size', - type=(int, int), default=(300, 300), - help='Output image size') -@click.option('-g', '--gpu', 'opt_gpu', default=0, - help='GPU index') -@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), - help='Slice list of files') -@click.option('-f', '--force', 'opt_force', is_flag=True, - help='Force overwrite file') -@click.option('-j', '--jitters', 'opt_jitters', default=cfg.DLIB_FACEREC_JITTERS, - help='Number of jitters') -@click.option('-p', '--padding', 'opt_padding', default=cfg.DLIB_FACEREC_PADDING, - help='Percentage padding') -@click.pass_context -def cli(ctx, opt_fp_in, opt_fp_records, opt_fp_out, opt_fp_media, opt_size, opt_gpu, - opt_slice, opt_jitters, opt_padding, opt_force): - """Converts frames with faces to CSV of rows""" - - import sys - import os - from os.path import join - from pathlib import Path - - from tqdm import tqdm - import numpy as np - import dlib # must keep a local reference for dlib - import cv2 as cv - import dlib - import pandas as pd - - from app.utils import logger_utils, file_utils, im_utils - from app.models.bbox import BBox - from app.processors import face_recognition - - # ------------------------------------------------- - # init here - - log = logger_utils.Logger.getLogger() - - if not opt_force and Path(opt_fp_out).exists(): - log.error('File exists. Use "-f / --force" to overwite') - return - - # init dlib FR - facerec = face_recognition.RecognitionDLIB() - - # load data - df_rois = pd.read_csv(opt_fp_in) - df_records = pd.read_csv(opt_fp_records) - - if opt_slice: - df_rois = df_rois[opt_slice[0]:opt_slice[1]] - log.info('Processing {:,} rows'.format(len(df_rois))) - nrows = len(df_rois) - - # face vecs - vecs = [] - - for roi_idx, row in tqdm(df_rois.iterrows(), total=nrows): - # make image path - record_id = int(row['id']) - df = df_records.iloc[record_id] - fp_im = join(opt_fp_media, df['subdir'], '{}.{}'.format(df['fn'], df['ext'])) - # load image - im = cv.imread(fp_im) - # make bbox - xywh = [row['x'], row['y'], row['w'] , row['h']] - bbox = BBox.from_xywh(*xywh) - # scale to actual image size - dim = (row['image_width'], row['image_height']) - bbox_dim = bbox.to_dim(dim) - # compute vec - vec = facerec.vec(im, bbox_dim, jitters=opt_jitters, padding=opt_padding) - vec_str = ','.join([repr(x) for x in vec]) - vecs.append( {'id': row['id'], 'vec': vec_str}) - - # save data - file_utils.mkdirs(opt_fp_out) - df_vecs = pd.DataFrame.from_dict(vecs) - df_vecs.to_csv(opt_fp_out, index=False) - log.info('saved {:,} lines to {}'.format(len(df_vecs), opt_fp_out)) \ No newline at end of file diff --git a/megapixels/commands/cv/face_vec_to_csv.py b/megapixels/commands/cv/face_vec_to_csv.py new file mode 100644 index 00000000..6c9fad09 --- /dev/null +++ b/megapixels/commands/cv/face_vec_to_csv.py @@ -0,0 +1,110 @@ +""" +Converts ROIs to face vector +""" + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_files', required=True, + help='Input ROI CSV') +@click.option('-r', '--rois', 'opt_fp_rois', required=True, + help='Input ROI CSV') +@click.option('-m', '--media', 'opt_dir_media', required=True, + help='Input media directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output CSV') +@click.option('--size', 'opt_size', + type=(int, int), default=(300, 300), + help='Output image size') +@click.option('-j', '--jitters', 'opt_jitters', default=cfg.DLIB_FACEREC_JITTERS, + help='Number of jitters') +@click.option('-p', '--padding', 'opt_padding', default=cfg.DLIB_FACEREC_PADDING, + help='Percentage padding') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.option('-g', '--gpu', 'opt_gpu', default=0, + help='GPU index') +@click.pass_context +def cli(ctx, opt_fp_files, opt_fp_rois, opt_dir_media, opt_fp_out, opt_size, + opt_slice, opt_force, opt_gpu, opt_jitters, opt_padding): + """Converts face ROIs to vectors""" + + import sys + import os + from os.path import join + from pathlib import Path + from glob import glob + + from tqdm import tqdm + import numpy as np + import dlib # must keep a local reference for dlib + import cv2 as cv + import pandas as pd + + from app.models.bbox import BBox + from app.utils import logger_utils, file_utils, im_utils + from app.processors import face_recognition + + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + # init face processors + facerec = face_recognition.RecognitionDLIB() + + # load data + df_file_meta = pd.read_csv(opt_fp_files) + df_rois = pd.read_csv(opt_fp_rois) + + if not opt_force and Path(opt_fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + if opt_slice: + df_rois = df_rois[opt_slice[0]:opt_slice[1]] + + # ------------------------------------------------- + # process here + + df_img_groups = df_rois.groupby('image_index') + log.debug('processing {:,} groups'.format(len(df_img_groups))) + + vecs = [] + + for image_index, df_img_group in tqdm(df_img_groups): + # make fp + roi_index = df_img_group.index.values[0] + file_meta = df_file_meta.iloc[image_index] # locate image meta + fp_im = join(opt_dir_media, file_meta.subdir, '{}.{}'.format(file_meta.fn, file_meta.ext)) + im = cv.imread(fp_im) + # get bbox + x = df_img_group.x.values[0] + y = df_img_group.y.values[0] + w = df_img_group.w.values[0] + h = df_img_group.h.values[0] + imw = df_img_group.image_width.values[0] + imh = df_img_group.image_height.values[0] + dim = im.shape[:2][::-1] + # get face vector + dim = (imw, imh) + bbox_dim = BBox.from_xywh(x, y, w, h).to_dim(dim) # convert to int real dimensions + # compute vec + # padding=opt_padding not yet implemented in 19.16 but merged in master + vec = facerec.vec(im, bbox_dim, jitters=opt_jitters) + vec_str = ','.join([repr(x) for x in vec]) # convert to string for CSV + vecs.append( {'roi_index': roi_index, 'image_index': image_index, 'vec': vec_str}) + + + # save date + file_utils.mkdirs(opt_fp_out) + df = pd.DataFrame.from_dict(vecs) + df.index.name = 'index' + df.to_csv(opt_fp_out) \ No newline at end of file diff --git a/megapixels/commands/cv/faces_to_csv.py b/megapixels/commands/cv/faces_to_csv.py deleted file mode 100644 index 1fd47571..00000000 --- a/megapixels/commands/cv/faces_to_csv.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Crop images to prepare for training -""" - -import click -# from PIL import Image, ImageOps, ImageFilter, ImageDraw - -from app.settings import types -from app.utils import click_utils -from app.settings import app_cfg as cfg - -color_filters = {'color': 1, 'gray': 2, 'all': 3} - -@click.command() -@click.option('-i', '--input', 'opt_dirs_in', required=True, multiple=True, - help='Input directory') -@click.option('-o', '--output', 'opt_fp_out', required=True, - help='Output CSV') -@click.option('-e', '--ext', 'opt_ext', - default='jpg', type=click.Choice(['jpg', 'png']), - help='File glob ext') -@click.option('--size', 'opt_size', - type=(int, int), default=(300, 300), - help='Output image size') -@click.option('-t', '--detector-type', 'opt_detector_type', - type=cfg.FaceDetectNetVar, - default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN), - help=click_utils.show_help(types.FaceDetectNet)) -@click.option('-g', '--gpu', 'opt_gpu', default=0, - help='GPU index') -@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1), - help='Confidence minimum threshold') -@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4), - help='Number pyramids to upscale for DLIB detectors') -@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), - help='Slice list of files') -@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, - help='Display detections to debug') -@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, - help='Use glob recursion (slower)') -@click.option('-f', '--force', 'opt_force', is_flag=True, - help='Force overwrite file') -@click.option('--color', 'opt_color_filter', - type=click.Choice(color_filters.keys()), default='color', - help='Filter to keep color or grayscale images (color = keep color') -@click.pass_context -def cli(ctx, opt_dirs_in, opt_fp_out, opt_ext, opt_size, opt_detector_type, - opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_recursive, opt_force, opt_color_filter): - """Converts frames with faces to CSV of ROIs""" - - import sys - import os - from os.path import join - from pathlib import Path - from glob import glob - - from tqdm import tqdm - import numpy as np - import dlib # must keep a local reference for dlib - import cv2 as cv - import pandas as pd - - from app.utils import logger_utils, file_utils, im_utils - from app.processors import face_detector - - # ------------------------------------------------- - # init here - - log = logger_utils.Logger.getLogger() - - if not opt_force and Path(opt_fp_out).exists(): - log.error('File exists. Use "-f / --force" to overwite') - return - - if opt_detector_type == types.FaceDetectNet.CVDNN: - detector = face_detector.DetectorCVDNN() - elif opt_detector_type == types.FaceDetectNet.DLIB_CNN: - detector = face_detector.DetectorDLIBCNN(opt_gpu) - elif opt_detector_type == types.FaceDetectNet.DLIB_HOG: - detector = face_detector.DetectorDLIBHOG() - elif opt_detector_type == types.FaceDetectNet.MTCNN: - detector = face_detector.DetectorMTCNN() - elif opt_detector_type == types.FaceDetectNet.HAAR: - log.error('{} not yet implemented'.format(opt_detector_type.name)) - return - - - # ------------------------------------------------- - # process here - color_filter = color_filters[opt_color_filter] - - # get list of files to process - fp_ims = [] - for opt_dir_in in opt_dirs_in: - if opt_recursive: - fp_glob = join(opt_dir_in, '**/*.{}'.format(opt_ext)) - fp_ims += glob(fp_glob, recursive=True) - else: - fp_glob = join(opt_dir_in, '*.{}'.format(opt_ext)) - fp_ims += glob(fp_glob) - log.debug(fp_glob) - - - if opt_slice: - fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] - log.debug('processing {:,} files'.format(len(fp_ims))) - - - data = [] - - for fp_im in tqdm(fp_ims): - im = cv.imread(fp_im) - - # filter out color or grayscale iamges - if color_filter != color_filters['all']: - try: - is_gray = im_utils.is_grayscale(im) - if is_gray and color_filter != color_filters['gray']: - log.debug('Skipping grayscale image: {}'.format(fp_im)) - continue - except Exception as e: - log.error('Could not check grayscale: {}'.format(fp_im)) - continue - - try: - bboxes = detector.detect(im, opt_size=opt_size, opt_pyramids=opt_pyramids) - except Exception as e: - log.error('could not detect: {}'.format(fp_im)) - log.error('{}'.format(e)) - fpp_im = Path(fp_im) - subdir = str(fpp_im.parent.relative_to(opt_dir_in)) - - for bbox in bboxes: - # log.debug('is square: {}'.format(bbox.w == bbox.h)) - nw,nh = int(bbox.w * im.shape[1]), int(bbox.h * im.shape[0]) - roi = { - 'fn': fpp_im.stem, - 'ext': fpp_im.suffix.replace('.',''), - 'x': bbox.x, - 'y': bbox.y, - 'w': bbox.w, - 'h': bbox.h, - 'image_height': im.shape[0], - 'image_width': im.shape[1], - 'subdir': subdir} - bbox_dim = bbox.to_dim(im.shape[:2][::-1]) # w,h - data.append(roi) - - # debug display - if opt_display and len(bboxes): - im_md = im_utils.resize(im, width=min(1200, opt_size[0])) - for bbox in bboxes: - bbox_dim = bbox.to_dim(im_md.shape[:2][::-1]) - cv.rectangle(im_md, bbox_dim.pt_tl, bbox_dim.pt_br, (0,255,0), 3) - cv.imshow('', im_md) - while True: - k = cv.waitKey(1) & 0xFF - if k == 27 or k == ord('q'): # ESC - cv.destroyAllWindows() - sys.exit() - elif k != 255: - # any key to continue - break - - # save date - file_utils.mkdirs(opt_fp_out) - df = pd.DataFrame.from_dict(data) - df.to_csv(opt_fp_out, index=False) \ No newline at end of file diff --git a/megapixels/commands/cv/faces_to_csv_indexed.py b/megapixels/commands/cv/faces_to_csv_indexed.py deleted file mode 100644 index ef958f89..00000000 --- a/megapixels/commands/cv/faces_to_csv_indexed.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Crop images to prepare for training -""" - -import click -# from PIL import Image, ImageOps, ImageFilter, ImageDraw - -from app.settings import types -from app.utils import click_utils -from app.settings import app_cfg as cfg - -color_filters = {'color': 1, 'gray': 2, 'all': 3} - -@click.command() -@click.option('-i', '--input', 'opt_fp_in', required=True, - help='Input CSV (eg image_files.csv)') -@click.option('-m', '--media', 'opt_dir_media', required=True, - help='Input media directory') -@click.option('-o', '--output', 'opt_fp_out', required=True, - help='Output CSV') -@click.option('--size', 'opt_size', - type=(int, int), default=(300, 300), - help='Output image size') -@click.option('-t', '--detector-type', 'opt_detector_type', - type=cfg.FaceDetectNetVar, - default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN), - help=click_utils.show_help(types.FaceDetectNet)) -@click.option('-g', '--gpu', 'opt_gpu', default=0, - help='GPU index') -@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1), - help='Confidence minimum threshold') -@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4), - help='Number pyramids to upscale for DLIB detectors') -@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), - help='Slice list of files') -@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, - help='Display detections to debug') -@click.option('-f', '--force', 'opt_force', is_flag=True, - help='Force overwrite file') -@click.option('--color', 'opt_color_filter', - type=click.Choice(color_filters.keys()), default='all', - help='Filter to keep color or grayscale images (color = keep color') -@click.option('--largest', 'opt_largest', is_flag=True, - help='Only keep largest face') -@click.pass_context -def cli(ctx, opt_fp_in, opt_dir_media, opt_fp_out, opt_size, opt_detector_type, - opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_force, opt_color_filter, - opt_largest): - """Converts frames with faces to CSV of ROIs""" - - import sys - import os - from os.path import join - from pathlib import Path - from glob import glob - - from tqdm import tqdm - import numpy as np - import dlib # must keep a local reference for dlib - import cv2 as cv - import pandas as pd - - from app.utils import logger_utils, file_utils, im_utils - from app.processors import face_detector - - # ------------------------------------------------- - # init here - - log = logger_utils.Logger.getLogger() - - if not opt_force and Path(opt_fp_out).exists(): - log.error('File exists. Use "-f / --force" to overwite') - return - - if opt_detector_type == types.FaceDetectNet.CVDNN: - detector = face_detector.DetectorCVDNN() - elif opt_detector_type == types.FaceDetectNet.DLIB_CNN: - detector = face_detector.DetectorDLIBCNN(opt_gpu) - elif opt_detector_type == types.FaceDetectNet.DLIB_HOG: - detector = face_detector.DetectorDLIBHOG() - elif opt_detector_type == types.FaceDetectNet.MTCNN: - detector = face_detector.DetectorMTCNN() - elif opt_detector_type == types.FaceDetectNet.HAAR: - log.error('{} not yet implemented'.format(opt_detector_type.name)) - return - - - # ------------------------------------------------- - # process here - color_filter = color_filters[opt_color_filter] - - # get list of files to process - df_files = pd.read_csv(opt_fp_in).set_index('index') - - if opt_slice: - df_files = df_files[opt_slice[0]:opt_slice[1]] - log.debug('processing {:,} files'.format(len(df_files))) - - - data = [] - - for df_file in tqdm(df_files.itertuples(), total=len(df_files)): - fp_im = join(opt_dir_media, df_file.subdir, '{}.{}'.format(df_file.fn, df_file.ext)) - im = cv.imread(fp_im) - - # filter out color or grayscale iamges - if color_filter != color_filters['all']: - try: - is_gray = im_utils.is_grayscale(im) - if is_gray and color_filter != color_filters['gray']: - log.debug('Skipping grayscale image: {}'.format(fp_im)) - continue - except Exception as e: - log.error('Could not check grayscale: {}'.format(fp_im)) - continue - - try: - bboxes = detector.detect(im, opt_size=opt_size, opt_pyramids=opt_pyramids, opt_largest=opt_largest) - except Exception as e: - log.error('could not detect: {}'.format(fp_im)) - log.error('{}'.format(e)) - continue - - for bbox in bboxes: - roi = { - 'image_index': int(df_file.Index), - 'x': bbox.x, - 'y': bbox.y, - 'w': bbox.w, - 'h': bbox.h, - 'image_width': im.shape[1], - 'image_height': im.shape[0]} - data.append(roi) - - # debug display - if opt_display and len(bboxes): - bbox_dim = bbox.to_dim(im.shape[:2][::-1]) # w,h - im_md = im_utils.resize(im, width=min(1200, opt_size[0])) - for bbox in bboxes: - bbox_dim = bbox.to_dim(im_md.shape[:2][::-1]) - cv.rectangle(im_md, bbox_dim.pt_tl, bbox_dim.pt_br, (0,255,0), 3) - cv.imshow('', im_md) - while True: - k = cv.waitKey(1) & 0xFF - if k == 27 or k == ord('q'): # ESC - cv.destroyAllWindows() - sys.exit() - elif k != 255: - # any key to continue - break - - # save date - file_utils.mkdirs(opt_fp_out) - df = pd.DataFrame.from_dict(data) - df.index.name = 'index' - df.to_csv(opt_fp_out) \ No newline at end of file diff --git a/megapixels/commands/cv/files_to_rois.py b/megapixels/commands/cv/files_to_rois.py new file mode 100644 index 00000000..1aaf991c --- /dev/null +++ b/megapixels/commands/cv/files_to_rois.py @@ -0,0 +1,156 @@ +""" +Crop images to prepare for training +""" + +import click +# from PIL import Image, ImageOps, ImageFilter, ImageDraw + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +color_filters = {'color': 1, 'gray': 2, 'all': 3} + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input CSV (eg image_files.csv)') +@click.option('-m', '--media', 'opt_dir_media', required=True, + help='Input media directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output CSV') +@click.option('--size', 'opt_size', + type=(int, int), default=(300, 300), + help='Output image size') +@click.option('-t', '--detector-type', 'opt_detector_type', + type=cfg.FaceDetectNetVar, + default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN), + help=click_utils.show_help(types.FaceDetectNet)) +@click.option('-g', '--gpu', 'opt_gpu', default=0, + help='GPU index') +@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1), + help='Confidence minimum threshold') +@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4), + help='Number pyramids to upscale for DLIB detectors') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, + help='Display detections to debug') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.option('--color', 'opt_color_filter', + type=click.Choice(color_filters.keys()), default='all', + help='Filter to keep color or grayscale images (color = keep color') +@click.option('--largest', 'opt_largest', is_flag=True, + help='Only keep largest face') +@click.pass_context +def cli(ctx, opt_fp_in, opt_dir_media, opt_fp_out, opt_size, opt_detector_type, + opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_force, opt_color_filter, + opt_largest): + """Converts frames with faces to CSV of ROIs""" + + import sys + import os + from os.path import join + from pathlib import Path + from glob import glob + + from tqdm import tqdm + import numpy as np + import dlib # must keep a local reference for dlib + import cv2 as cv + import pandas as pd + + from app.utils import logger_utils, file_utils, im_utils + from app.processors import face_detector + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + if not opt_force and Path(opt_fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + if opt_detector_type == types.FaceDetectNet.CVDNN: + detector = face_detector.DetectorCVDNN() + elif opt_detector_type == types.FaceDetectNet.DLIB_CNN: + detector = face_detector.DetectorDLIBCNN(opt_gpu) + elif opt_detector_type == types.FaceDetectNet.DLIB_HOG: + detector = face_detector.DetectorDLIBHOG() + elif opt_detector_type == types.FaceDetectNet.MTCNN: + detector = face_detector.DetectorMTCNN() + elif opt_detector_type == types.FaceDetectNet.HAAR: + log.error('{} not yet implemented'.format(opt_detector_type.name)) + return + + + # ------------------------------------------------- + # process here + color_filter = color_filters[opt_color_filter] + + # get list of files to process + df_files = pd.read_csv(opt_fp_in).set_index('index') + + if opt_slice: + df_files = df_files[opt_slice[0]:opt_slice[1]] + log.debug('processing {:,} files'.format(len(df_files))) + + + data = [] + + for df_file in tqdm(df_files.itertuples(), total=len(df_files)): + fp_im = join(opt_dir_media, str(df_file.subdir), f'{df_file.fn}.{df_file.ext}') + im = cv.imread(fp_im) + + # filter out color or grayscale iamges + if color_filter != color_filters['all']: + try: + is_gray = im_utils.is_grayscale(im) + if is_gray and color_filter != color_filters['gray']: + log.debug('Skipping grayscale image: {}'.format(fp_im)) + continue + except Exception as e: + log.error('Could not check grayscale: {}'.format(fp_im)) + continue + + try: + bboxes = detector.detect(im, size=opt_size, pyramids=opt_pyramids, largest=opt_largest) + except Exception as e: + log.error('could not detect: {}'.format(fp_im)) + log.error('{}'.format(e)) + continue + + for bbox in bboxes: + roi = { + 'image_index': int(df_file.Index), + 'x': bbox.x, + 'y': bbox.y, + 'w': bbox.w, + 'h': bbox.h, + 'image_width': im.shape[1], + 'image_height': im.shape[0]} + data.append(roi) + + # debug display + if opt_display and len(bboxes): + bbox_dim = bbox.to_dim(im.shape[:2][::-1]) # w,h + im_md = im_utils.resize(im, width=min(1200, opt_size[0])) + for bbox in bboxes: + bbox_dim = bbox.to_dim(im_md.shape[:2][::-1]) + cv.rectangle(im_md, bbox_dim.pt_tl, bbox_dim.pt_br, (0,255,0), 3) + cv.imshow('', im_md) + while True: + k = cv.waitKey(1) & 0xFF + if k == 27 or k == ord('q'): # ESC + cv.destroyAllWindows() + sys.exit() + elif k != 255: + # any key to continue + break + + # save date + file_utils.mkdirs(opt_fp_out) + df = pd.DataFrame.from_dict(data) + df.index.name = 'index' + df.to_csv(opt_fp_out) \ No newline at end of file diff --git a/megapixels/commands/datasets/file_meta.py b/megapixels/commands/datasets/file_meta.py new file mode 100644 index 00000000..e1456f44 --- /dev/null +++ b/megapixels/commands/datasets/file_meta.py @@ -0,0 +1,84 @@ +""" +Begin with this file to process folder of images +- Converts folders and subdirectories into CSV with file attributes split +""" +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils.logger_utils import Logger + +log = Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output file for file meta CSV') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, + help='Use glob recursion (slower)') +@click.option('-t', '--threads', 'opt_threads', default=4, + help='Number of threads') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_recursive, opt_threads, opt_force): + """Multithreading test""" + + from glob import glob + from os.path import join + from pathlib import Path + import time + from multiprocessing.dummy import Pool as ThreadPool + import random + + import pandas as pd + from tqdm import tqdm + from glob import glob + + from app.utils import file_utils, im_utils + + + if not opt_force and Path(opt_fp_out).exists(): + log.error('File exists. Use "-f / --force" to overwite') + return + + fp_ims = [] + log.info(f'Globbing {opt_fp_in}') + for ext in ['jpg', 'png']: + if opt_recursive: + fp_glob = join(opt_fp_in, '**/*.{}'.format(ext)) + fp_ims += glob(fp_glob, recursive=True) + else: + fp_glob = join(opt_fp_in, '*.{}'.format(ext)) + fp_ims += glob(fp_glob) + + if not fp_ims: + log.warn('No images. Try with "--recursive"') + return + + if opt_slice: + fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] + + log.info('Processing {:,} images'.format(len(fp_ims))) + + + # convert data to dict + data = [] + for i, fp_im in enumerate(tqdm(fp_ims)): + fpp_im = Path(fp_im) + subdir = str(fpp_im.parent.relative_to(opt_fp_in)) + data.append( { + 'subdir': subdir, + 'fn': fpp_im.stem, + 'ext': fpp_im.suffix.replace('.','') + }) + + # save to CSV + file_utils.mkdirs(opt_fp_out) + df = pd.DataFrame.from_dict(data) + df.index.name = 'index' + df.to_csv(opt_fp_out) \ No newline at end of file diff --git a/megapixels/commands/datasets/sha256.py b/megapixels/commands/datasets/sha256.py index c04fb504..4c734073 100644 --- a/megapixels/commands/datasets/sha256.py +++ b/megapixels/commands/datasets/sha256.py @@ -10,18 +10,18 @@ log = Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input directory') -@click.option('-o', '--output', 'opt_fp_out', +@click.option('-m', '--media', 'opt_dir_media', required=True, + help='Input media directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, help='Output directory') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') -@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False, - help='Use glob recursion (slower)') @click.option('-t', '--threads', 'opt_threads', default=4, help='Number of threads') @click.option('-f', '--force', 'opt_force', is_flag=True, help='Force overwrite file') @click.pass_context -def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_recursive, opt_threads, opt_force): +def cli(ctx, opt_fp_in, opt_dir_media, opt_fp_out, opt_slice, opt_threads, opt_force): """Multithreading test""" from glob import glob @@ -42,47 +42,46 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_recursive, opt_threads, opt_f log.error('File exists. Use "-f / --force" to overwite') return - fp_ims = [] - for ext in ['jpg', 'png']: - if opt_recursive: - fp_glob = join(opt_fp_in, '**/*.{}'.format(ext)) - fp_ims += glob(fp_glob, recursive=True) - else: - fp_glob = join(opt_fp_in, '*.{}'.format(ext)) - fp_ims += glob(fp_glob) + df_files = pd.read_csv(opt_fp_in).set_index('index') if opt_slice: - fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] + df_files = df_files[opt_slice[0]:opt_slice[1]] - log.info('Processing {:,} images'.format(len(fp_ims))) + log.info('Processing {:,} images'.format(len(df_files))) - pbar = tqdm(total=100) + + # prepare list of images to multithread into sha256s + file_objs = [] + for ds_file in df_files.itertuples(): + fp_im = join(opt_dir_media, str(ds_file.subdir), f"{ds_file.fn}.{ds_file.ext}") + file_objs.append({'fp': fp_im, 'index': ds_file.Index}) + + # convert to thread pool + pbar = tqdm(total=len(file_objs)) - def as_sha256(fp_im): + def as_sha256(file_obj): pbar.update(1) - return file_utils.sha256(fp_im) + file_obj['sha256'] = file_utils.sha256(file_obj['fp']) + return file_obj # multithread pool + pool_file_objs = [] st = time.time() pool = ThreadPool(opt_threads) - with tqdm(total=len(fp_ims)) as pbar: - sha256s = pool.map(as_sha256, fp_ims) + with tqdm(total=len(file_objs)) as pbar: + pool_file_objs = pool.map(as_sha256, file_objs) pbar.close() - + # convert data to dict data = [] - for i, fp_im in enumerate(fp_ims): - fpp_im = Path(fp_im) - subdir = str(fpp_im.parent.relative_to(opt_fp_in)) - sha256 = sha256s[i] + for pool_file_obj in pool_file_objs: data.append( { - 'sha256': sha256, - 'subdir': subdir, - 'fn': fpp_im.stem, - 'ext': fpp_im.suffix.replace('.','') + 'sha256': pool_file_obj['sha256'], + 'index': pool_file_obj['index'] }) # save to CSV + file_utils.mkdirs(opt_fp_out) df = pd.DataFrame.from_dict(data) df.to_csv(opt_fp_out, index=False) -- cgit v1.2.3-70-g09d2