summaryrefslogtreecommitdiff
path: root/megapixels
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-04-10 23:04:29 +0200
committeradamhrv <adam@ahprojects.com>2019-04-10 23:04:29 +0200
commit8032dc798287b0ae26342063c3016858f2b44974 (patch)
treec47bb7dc13b2dbba550fb627152f0089191f904b /megapixels
parent05d96677b8f58780248e45e28510b3ef8ed1e1b9 (diff)
add body detector, mod pull sheet
Diffstat (limited to 'megapixels')
-rw-r--r--megapixels/app/processors/person_detector.py65
-rw-r--r--megapixels/app/settings/app_cfg.py5
-rw-r--r--megapixels/app/settings/types.py3
-rw-r--r--megapixels/app/utils/display_utils.py7
-rw-r--r--megapixels/app/utils/identity_utils.py19
-rw-r--r--megapixels/commands/datasets/citations_to_csv.py35
-rw-r--r--megapixels/commands/datasets/pull_spreadsheet.py25
-rw-r--r--megapixels/commands/processor/body_roi_video.py148
8 files changed, 283 insertions, 24 deletions
diff --git a/megapixels/app/processors/person_detector.py b/megapixels/app/processors/person_detector.py
new file mode 100644
index 00000000..6daa8c40
--- /dev/null
+++ b/megapixels/app/processors/person_detector.py
@@ -0,0 +1,65 @@
+import sys
+import os
+from os.path import join
+from pathlib import Path
+
+import cv2 as cv
+import numpy as np
+import imutils
+import operator
+
+from app.utils import im_utils, logger_utils
+from app.models.bbox import BBox
+from app.settings import app_cfg as cfg
+from app.settings import types
+
+
+class DetectorCVDNN:
+
+ # MobileNet SSD
+ dnn_scale = 0.007843 # fixed
+ dnn_mean = (127.5, 127.5, 127.5) # fixed
+ dnn_crop = False # crop or force resize
+ blob_size = (300, 300)
+ conf = 0.95
+
+ # detect
+ CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
+ "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
+ "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
+ "sofa", "train", "tvmonitor"]
+
+ def __init__(self):
+ self.log = logger_utils.Logger.getLogger()
+ fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'mobilenet_ssd', 'MobileNetSSD_deploy.prototxt')
+ fp_model = join(cfg.DIR_MODELS_CAFFE, 'mobilenet_ssd', 'MobileNetSSD_deploy.caffemodel')
+ self.net = cv.dnn.readNet(fp_prototxt, fp_model)
+ self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
+ self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
+
+ def detect(self, im, conf=None, largest=False, pyramids=None, zone=False, blob_size=None):
+ """Detects bodies and returns (list) of (BBox)"""
+ conf = self.conf if conf is None else conf
+ blob_size = self.blob_size if blob_size is None else blob_size
+ im = cv.resize(im, blob_size)
+ dim = im.shape[:2][::-1]
+ blob = cv.dnn.blobFromImage(im, self.dnn_scale, dim, self.dnn_mean)
+ self.net.setInput(blob)
+ net_outputs = self.net.forward()
+
+ bboxes = []
+ for i in range(0, net_outputs.shape[2]):
+ det_conf = float(net_outputs[0, 0, i, 2])
+ bounds = np.array(net_outputs[0, 0, i, 3:7]) # bug: ensure all x,y within 1.0 ?
+ if det_conf > conf and np.all(bounds < 1):
+ idx = int(net_outputs[0, 0, i, 1])
+ if self.CLASSES[idx] == "person":
+ rect_norm = net_outputs[0, 0, i, 3:7]
+ bboxes.append(BBox(*rect_norm))
+
+ if largest and len(bboxes) > 1:
+ # only keep largest
+ bboxes.sort(key=operator.attrgetter('area'), reverse=True)
+ bboxes = [bboxes[0]]
+
+ return bboxes \ No newline at end of file
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py
index 1eed1a41..98d36b5f 100644
--- a/megapixels/app/settings/app_cfg.py
+++ b/megapixels/app/settings/app_cfg.py
@@ -19,6 +19,7 @@ LogLevelVar = click_utils.ParamVar(types.LogLevel)
MetadataVar = click_utils.ParamVar(types.Metadata)
DatasetVar = click_utils.ParamVar(types.Dataset)
DataStoreVar = click_utils.ParamVar(types.DataStore)
+
# Face analysis
HaarCascadeVar = click_utils.ParamVar(types.HaarCascade)
FaceDetectNetVar = click_utils.ParamVar(types.FaceDetectNet)
@@ -27,6 +28,10 @@ FaceLandmark2D_5Var = click_utils.ParamVar(types.FaceLandmark2D_5)
FaceLandmark2D_68Var = click_utils.ParamVar(types.FaceLandmark2D_68)
FaceLandmark3D_68Var = click_utils.ParamVar(types.FaceLandmark3D_68)
+# Person/Body detector
+BodyDetectNetVar = click_utils.ParamVar(types.BodyDetectNet)
+
+
# base path
DIR_SELF = os.path.dirname(os.path.realpath(__file__))
DIR_ROOT = Path(DIR_SELF).parent.parent.parent
diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py
index 3d7e96c0..2609ece7 100644
--- a/megapixels/app/settings/types.py
+++ b/megapixels/app/settings/types.py
@@ -59,6 +59,9 @@ class FaceDetectNet(Enum):
"""Scene text detector networks"""
HAAR, DLIB_CNN, DLIB_HOG, CVDNN, MTCNN_TF, MTCNN_PT, MTCNN_CAFFE = range(7)
+class BodyDetectNet(Enum):
+ CVDNN = range(1)
+
class FaceExtractor(Enum):
"""Type of face recognition feature extractor"""
# TODO deprecate DLIB resnet and use only CVDNN Caffe models
diff --git a/megapixels/app/utils/display_utils.py b/megapixels/app/utils/display_utils.py
index 43328ae9..8e265ae7 100644
--- a/megapixels/app/utils/display_utils.py
+++ b/megapixels/app/utils/display_utils.py
@@ -19,3 +19,10 @@ def handle_keyboard(delay_amt=1):
break
elif k != 255:
log.debug(f'k: {k}')
+
+def handle_keyboard_video(delay_amt=1):
+ key = cv.waitKey(1) & 0xFF
+ # if the `q` key was pressed, break from the loop
+ if key == ord("q"):
+ cv.destroyAllWindows()
+ sys.exit()
diff --git a/megapixels/app/utils/identity_utils.py b/megapixels/app/utils/identity_utils.py
index 775652dc..5855fbbd 100644
--- a/megapixels/app/utils/identity_utils.py
+++ b/megapixels/app/utils/identity_utils.py
@@ -29,6 +29,13 @@ def names_match_strict(a, b):
return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a)
+def sanitize_name(name, as_str=False):
+ splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')]
+ if as_str:
+ return ' '.join(splits)
+ else:
+ return splits
+
'''
class Dataset(Enum):
LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
@@ -106,12 +113,18 @@ def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
def similarity(a, b):
return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio()
-def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False):
+def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False):
'''Returns boolean if names are similar enough
'''
# strip spaces and split names into list of plain text words
- name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
- name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
+ if name_a_pre:
+ name_a_clean = name_a
+ else:
+ name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
+ if name_b_pre:
+ name_b_clean = name_b
+ else:
+ name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
# assign short long vars
len_a = len(name_a_clean)
diff --git a/megapixels/commands/datasets/citations_to_csv.py b/megapixels/commands/datasets/citations_to_csv.py
index c6a04bd4..f3277d7e 100644
--- a/megapixels/commands/datasets/citations_to_csv.py
+++ b/megapixels/commands/datasets/citations_to_csv.py
@@ -35,9 +35,12 @@ def cli(ctx, opt_fp_in, opt_dir_out):
else:
fps_in = [opt_fp_in]
- log.info(f'{fps_in}')
+ log.info(f'Converting {len(fps_in)} JSON files to CSV')
for fp_in in fps_in:
+
+ log.info(f'Processing: {Path(fp_in).name}')
+
with open(fp_in, 'r') as fp:
json_data = json.load(fp)
@@ -45,18 +48,22 @@ def cli(ctx, opt_fp_in, opt_dir_out):
papers = []
dataset_key = json_data['paper']['key']
dataset_name = json_data['paper']['name']
- papers_main = get_orig_paper(json_data)
- papers += papers_main
- papers_citations = get_citations(dataset_key, dataset_name, json_data)
- papers += papers_citations
- papers = [p.to_dict() for p in papers]
+ try:
+ papers_main = get_orig_paper(json_data)
+ papers += papers_main
+ papers_citations = get_citations(dataset_key, dataset_name, json_data)
+ papers += papers_citations
+ papers = [p.to_dict() for p in papers]
+ except Exception as e:
+ log.error(f'{e} on {Path(fp_in).name}')
+ continue
# save
if not opt_dir_out:
# save to same directory replacing ext
fp_out = fp_in.replace('.json','.csv')
else:
- fp_out = join(opt_dir_out, Path(fp_in).name)
+ fp_out = join(opt_dir_out, f'{Path(fp_in).stem}.csv')
df_papers = pd.DataFrame.from_dict(papers)
df_papers.index.name = 'id'
@@ -76,13 +83,13 @@ def get_citations(dataset_key, dataset_name, json_data):
addresses = p.get('addresses', '')
if addresses:
for a in addresses:
- pdf_url = '' if not p['pdf'] else p['pdf'][0]
+ pdf_url = '' if not p.get('pdf') else p.get('pdf')[0]
paper = Paper(dataset_key, dataset_name, p['id'], p['title'], d_type,
year, pdf_url,
a['name'], a['type'], a['lat'], a['lng'], a['country'])
papers.append(paper)
else:
- pdf_url = '' if not p['pdf'] else p['pdf'][0]
+ pdf_url = '' if not p.get('pdf') else p.get('pdf')[0]
paper = Paper(p['key'], p['name'], d['id'], p['title'], 'main', year, pdf_url)
papers.append(paper)
return papers
@@ -98,13 +105,13 @@ def get_orig_paper(json_data):
for a in addresses:
if type(a) == str or a is None:
continue
- pdf_url = '' if not p['pdf'] else p['pdf'][0]
- paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year,
+ pdf_url = '' if not p.get('pdf') else p.get('pdf')[0]
+ paper = Paper(p.get('key'), p.get('name'), p.get('paper_id'), p.get('title'), d_type, year,
pdf_url,
- a['name'], a['type'], a['lat'], a['lng'], a['country'])
+ a.get('name'), a.get('type'), a.get('lat'), a.get('lng'), a.get('country'))
papers.append(paper)
else:
- pdf_url = '' if not p['pdf'] else p['pdf'][0]
- paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year, pdf_url)
+ pdf_url = '' if not p.get('pdf') else p.get('pdf')[0]
+ paper = Paper(p.get('key'), p.get('name'), p.get('paper_id'), p.get('title'), d_type, year, pdf_url)
papers.append(paper)
return papers
diff --git a/megapixels/commands/datasets/pull_spreadsheet.py b/megapixels/commands/datasets/pull_spreadsheet.py
index b8b68094..caf5eb43 100644
--- a/megapixels/commands/datasets/pull_spreadsheet.py
+++ b/megapixels/commands/datasets/pull_spreadsheet.py
@@ -21,6 +21,10 @@ from app.utils.logger_utils import Logger
log = Logger.getLogger()
opt_sheets = ['datasets', 'relationships', 'funding', 'references', 'sources', 'tags', 'citations', 'legal']
+dataset_sheet_keys = ['key', 'name_short', 'name_full', 'url', 'dl_im', 'purpose', 'funded_by',
+ 'year_start', 'year_end', 'year_published', 'images', 'videos', 'identities',
+ 'faces_or_persons', 'campus', 'youtube', 'flickr', 'google', 'bing', 'comment']
+
@click.command()
@click.option('-n', '--name', 'opt_spreadsheets', multiple=True,
@@ -30,11 +34,15 @@ opt_sheets = ['datasets', 'relationships', 'funding', 'references', 'sources', '
@click.option('--all', 'opt_all', is_flag=True,
help='Get all sheets')
@click.option('-o', '--output', 'opt_fp_out', required=True,
+ type=click.Path(file_okay=False, dir_okay=True),
help='Path to directory or filename')
+@click.option('--share', 'opt_share', required=True,
+ type=click.Choice(['nyt', 'ft']),
+ help='Share filter')
@click.option('-f', '--force', 'opt_force', is_flag=True,
help='Force overwrite')
@click.pass_context
-def cli(ctx, opt_spreadsheets, opt_fp_out, opt_all, opt_force):
+def cli(ctx, opt_spreadsheets, opt_fp_out, opt_all, opt_share, opt_force):
"""Fetch Google spreadsheet"""
import sys
@@ -47,6 +55,12 @@ def cli(ctx, opt_spreadsheets, opt_fp_out, opt_all, opt_force):
for sheet_name in opt_spreadsheets:
log.info(f'Get spreadsheet: {sheet_name}')
+ fp_out = join(opt_fp_out, f'{sheet_name}.csv')
+ fpp_out = Path(fp_out)
+ if fpp_out.exists() and not opt_force:
+ log.error(f'File "{fpp_out} exists. Use "-f" to overwrite')
+ return
+
sheet_data = fetch_google_sheet_objects(name=sheet_name)
df_sheet = pd.DataFrame.from_dict(sheet_data)
if sheet_name == 'datasets':
@@ -58,22 +72,19 @@ def cli(ctx, opt_spreadsheets, opt_fp_out, opt_all, opt_force):
fpp_out = fpp_out.parent
else:
fpp_out = join(opt_fp_out, f'{sheet_name}.csv')
+ log.info(f'Writing file: {fpp_out}')
df_sheet.to_csv(fpp_out)
def clean_datasets_sheet_ft(df):
# clean data for FT
df = df[df['ft_share'] == 'Y']
- keys = ['key', 'name_short', 'name_full', 'url', 'downloaded', 'purpose', 'wild']
- keys += ['campus', 'year_start', 'year_end', 'year_published', 'images', 'videos', 'identities', 'faces_or_persons', 'youtube', 'flickr', 'google', 'bing', 'comment']
- return df[keys]
+ return df[dataset_sheet_keys]
def clean_datasets_sheet_nyt(df):
# clean data for FT
df = df[df['ft_share'] == 'Y']
- keys = ['key', 'name_short', 'name_full', 'url', 'downloaded', 'purpose', 'wild']
- keys += ['campus', 'year_start', 'year_end', 'year_published', 'images', 'videos', 'identities', 'faces_or_persons', 'youtube', 'flickr', 'google', 'bing', 'comment']
- return df[keys]
+ return df[dataset_sheet_keys]
def fetch_spreadsheet():
"""Open the Google Spreadsheet, which contains the individual worksheets"""
diff --git a/megapixels/commands/processor/body_roi_video.py b/megapixels/commands/processor/body_roi_video.py
new file mode 100644
index 00000000..84bcebd2
--- /dev/null
+++ b/megapixels/commands/processor/body_roi_video.py
@@ -0,0 +1,148 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+# from PIL import Image, ImageOps, ImageFilter, ImageDraw
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+color_filters = {'color': 1, 'gray': 2, 'all': 3}
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Override enum input filename CSV')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+ help='Override enum output filename CSV')
+@click.option('--store', 'opt_data_store',
+ type=cfg.DataStoreVar,
+ default=click_utils.get_default(types.DataStore.HDD),
+ show_default=True,
+ help=click_utils.show_help(types.Dataset))
+@click.option('--size', 'opt_size',
+ type=(int, int), default=(640, 480),
+ help='Input image size')
+@click.option('-d', '--detector', 'opt_detector_type',
+ type=cfg.BodyDetectNetVar,
+ default=click_utils.get_default(types.BodyDetectNet.CVDNN),
+ help=click_utils.show_help(types.BodyDetectNet))
+@click.option('-g', '--gpu', 'opt_gpu', default=0,
+ help='GPU index')
+@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1),
+ help='Confidence minimum threshold')
+@click.option('-p', '--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4),
+ help='Number pyramids to upscale for DLIB detectors')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+ help='Slice list of files')
+@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False,
+ help='Display detections to debug')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+ help='Force overwrite file')
+@click.option('--color', 'opt_color_filter',
+ type=click.Choice(color_filters.keys()), default='color',
+ help='Filter to keep color or grayscale images (color = keep color')
+@click.option('--keep', 'opt_largest', type=click.Choice(['largest', 'all']), default='largest',
+ help='Only keep largest face')
+@click.option('--zone', 'opt_zone', default=(0.0, 0.0), type=(float, float),
+ help='Face center must be located within zone region (0.5 = half width/height)')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_data_store, opt_size, opt_detector_type,
+ opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display, opt_force, opt_color_filter,
+ opt_largest, opt_zone):
+ """Converts frames with faces to CSV of ROIs"""
+
+ import sys
+ import os
+ from os.path import join
+ from pathlib import Path
+ from glob import glob
+
+ from tqdm import tqdm
+ import numpy as np
+ import dlib # must keep a local reference for dlib
+ import cv2 as cv
+ import pandas as pd
+
+ from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
+ from app.processors import person_detector
+ from app.models.data_store import DataStore
+
+ # -------------------------------------------------
+ # init here
+
+ log = logger_utils.Logger.getLogger()
+
+ opt_fp_out = opt_fp_out
+ if not opt_force and Path(opt_fp_out).exists():
+ log.error('File exists. Use "-f / --force" to overwite')
+ return
+
+ # set detector
+ if opt_detector_type == types.BodyDetectNet.CVDNN:
+ detector = person_detector.DetectorCVDNN()
+ else:
+ log.error('{} not yet implemented'.format(opt_detector_type.name))
+ return
+
+ # set largest flag, to keep all or only largest
+ opt_largest = (opt_largest == 'largest')
+
+ # process video
+ cap = cv.VideoCapture(opt_fp_in)
+
+ bboxes_all = []
+ data_out = []
+ frame_index = 0
+
+ while cap.isOpened():
+ # get video frame
+ readable, im = cap.read()
+ if not readable:
+ break
+
+ im_resized = im_utils.resize(im, width=opt_size[0], height=opt_size[1])
+
+ try:
+ bboxes_norm = detector.detect(im_resized, pyramids=opt_pyramids, largest=opt_largest,
+ zone=opt_zone, conf=opt_conf_thresh, blob_size=opt_size)
+ except Exception as e:
+ log.error('could not detect: {}'.format(frame_index))
+ log.error('{}'.format(e))
+ continue
+
+ for bbox in bboxes_norm:
+ roi = {
+ 'record_index': frame_index,
+ 'x': bbox.x,
+ 'y': bbox.y,
+ 'w': bbox.w,
+ 'h': bbox.h
+ }
+ data_out.append(roi)
+
+ if opt_display and len(bboxes_norm):
+ # draw each box
+ for bbox_norm in bboxes_norm:
+ dim = im_resized.shape[:2][::-1]
+ bbox_dim = bbox.to_dim(dim)
+ # if dim[0] > 1000:
+ # im_resized = im_utils.resize(im_resized, width=1000)
+ im_resized = draw_utils.draw_bbox(im_resized, bbox_norm)
+
+ # display and wait
+ cv.imshow('', im_resized)
+ display_utils.handle_keyboard_video()
+
+ frame_index += 1
+
+
+ # create DataFrame and save to CSV
+ file_utils.mkdirs(opt_fp_out)
+ df = pd.DataFrame.from_dict(data_out)
+ df.index.name = 'index'
+ df.to_csv(opt_fp_out)
+
+ # save script
+ file_utils.write_text(' '.join(sys.argv), '{}.sh'.format(opt_fp_out)) \ No newline at end of file