change to cli_proc

author: adamhrv <adam@ahprojects.com> 2019-01-18 11:00:18 +0100
committer: adamhrv <adam@ahprojects.com> 2019-01-18 11:00:18 +0100
commit: e06af50389f849be0bfe4fa97d39f4519ef2c711 (patch)
tree: 49755b51e1b8b1f8031e5483333570a8e9951272 /megapixels/commands
parent: 03ad11fb2a3dcd425d50167b15d72d4e0ef536a2 (diff)
20 files changed, 240 insertions, 7 deletions
diff --git a/megapixels/commands/datasets/preproc_wiki_imdb.py b/megapixels/commands/datasets/preproc_wiki_imdb.py
new file mode 100644
index 00000000..66680ed0
--- /dev/null
+++ b/megapixels/commands/datasets/preproc_wiki_imdb.py
@@ -0,0 +1,205 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import dlib
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+  help='Output directory')
+@click.option('--videos', 'opt_dir_videos',
+  help='Output directory')
+@click.option('--action', 'opt_action', 
+  type=click.Choice(['info', 'faces', 'rename', 'download', 'metadata', 'split_frames']),
+  default='info',
+  help='Command action')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_videos, opt_action):
+  """YTMU utils"""
+
+  
+  from tqdm import tqdm
+
+  # -------------------------------------------------
+  # process  
+  
+  if opt_action == 'metadata':
+    # downloads video metadata with ytdl
+    handle_metadata(opt_fp_in, opt_fp_out)
+  elif opt_action == 'download':
+    # downloads video files with ytdl
+    handle_download(opt_fp_in, opt_fp_out)
+  elif opt_action == 'info':
+    # converts original data file to clean CSV
+    handle_info()
+  elif opt_action == 'rename':
+    # rename the videos to video ID
+    handle_rename(opt_fp_in, opt_fp_out, opt_dir_videos)
+  elif opt_action == 'split_frames':
+    # rename the videos to video ID
+    handle_split_frames(opt_fp_in, opt_fp_out, opt_dir_videos)
+  
+
+
+
+# ----------------------------------------------------
+# handlers
+
+def handle_split_frames(fp_in, dir_out, dir_videos):
+  if not dir_out or not dir_videos:
+    log.error('-o/--output and --videos required')
+    return
+  import cv2 as cv
+  from tqdm import tqdm
+  from app.processors import face_detector
+  detector = face_detector.DetectorDLIBCNN()
+
+  # get file list
+  fp_videos = glob(join(dir_videos, '*.mp4'))
+  fp_videos += glob(join(dir_videos, '*.webm'))
+  fp_videos += glob(join(dir_videos, '*.mkv'))
+  face_interval = 30
+  frame_interval_count = 0
+  frame_count = 0
+
+  file_utils.mkdirs(dir_out)
+
+  for fp_video in tqdm(fp_videos):
+    # log.debug('opening: {}'.format(fp_video))
+    video = cv.VideoCapture(fp_video)
+    while video.isOpened():
+      res, frame = video.read()
+      if not res:
+        break
+
+      frame_count += 1  # for naming
+      frame_interval_count += 1  # for interval
+      bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0)
+      if len(bboxes) > 0 and frame_interval_count >= face_interval:
+        # save frame
+        fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count)))
+        cv.imwrite(fp_frame, frame)
+        frame_interval_count = 0
+
+
+def handle_metadata(fp_in, fp_out):
+  
+  keys = ['description', 'average_rating', 'dislike_count', 'categories', 
+  'thumbnail', 'title', 'upload_date', 'uploader_url', 'uploader_id',
+  'fps', 'height', 'width', 'like_count', 'license', 'tags']
+
+  import youtube_dl
+
+  ydl = youtube_dl.YoutubeDL({'outtmpl': '%(id)s%(ext)s'})
+
+  df = pd.read_csv(fp_in)
+  data_exp = []
+
+  for i, row in df.iterrows():
+    video_data = {'url': row['url'], 'id': row['id']}
+    try:
+      with ydl:
+        url = 'http://www.youtube.com/watch?v={}'.format(row['id'])
+        result = ydl.extract_info(url, download=False)
+      video = result['entries'][0] if 'entries' in result else result
+      for k in keys:
+        val = video[k]
+        if k == 'title':
+          log.debug(val)
+        if type(val) == list:
+          val = '; '.join(val)
+        if type(val) == str:
+          video_data[k] = str(val).replace(',',';')
+      # log.debug('video_data: {}'.format(video_data))
+    except Exception as e:
+      log.warn('video unavilable: {}'.format(row['url']))
+      log.error(e)
+      continue
+    data_exp.append(video_data)
+
+  df_exp = pd.DataFrame.from_dict(data_exp)
+  df_exp.to_csv(fp_out)
+
+
+def handle_download(fp_in, dir_out):
+  import youtube_dl
+  df = pd.read_csv(fp_in)
+  fp_videos = glob(join(dir_out, '*.mp4'))
+  fp_videos += glob(join(dir_out, '*.webm'))
+  fp_videos += glob(join(dir_out, '*.mkv'))
+  
+  ydl = youtube_dl.YoutubeDL({'outtmpl': '%(id)s%(ext)s'})
+
+  for i, row in df.iterrows():
+    vid = row['id']
+    found = False
+    for fp_video in fp_videos:
+      if vid in fp_video:
+        log.debug('skip: {}'.format(vid))
+        found = True
+    if not found:
+      try:
+        with ydl:
+          ydl.download(['http://www.youtube.com/watch?v={}'.format(vid)])
+      except:
+        log.error('could not dl: {}'.format(vid))
+
+
+def handle_info(fp_in, fp_out):
+  if not fp_out:
+    log.error('--output required')
+    return
+  urls = file_utils.load_text(fp_in)
+  videos  = []
+  for url in urls:
+    splits = url.split('v=')
+    try:
+      vid = splits[1]
+      vid = vid.split('&')[0]
+      videos.append({'url': url, 'id': vid})
+    except:
+      log.warn('no video id for {}'.format(url))
+  # convert to df
+  df = pd.DataFrame.from_dict(videos)
+  df.to_csv(opt_fp_out)
+
+  
+def handle_rename(fp_in, fp_out, dir_videos):
+  import shutil
+  
+  if not dir_videos:
+    log.error('--videos required')
+    return
+
+  fp_videos = glob(join(dir_videos, '*.mp4'))
+  fp_videos += glob(join(dir_videos, '*.webm'))
+  fp_videos += glob(join(dir_videos, '*.mkv'))
+
+  df = pd.read_csv(fp_in)
+
+  for i, row in df.iterrows():
+    vid = row['id']
+    fp_videos_copy = fp_videos.copy()
+    for fp_video in fp_videos:
+      if vid in fp_video:
+        dst = join(dir_videos, '{}{}'.format(vid, Path(fp_video).suffix))
+        shutil.move(fp_video, dst)
+        log.debug('move {} to {}'.format(fp_video, dst))
+        fp_videos.remove(fp_video)
+        break
+\ No newline at end of file
diff --git a/megapixels/commands/demo/face_search.py b/megapixels/commands/demo/face_search.py
index f551cafd..4c7036f4 100644
--- a/megapixels/commands/demo/face_search.py
+++ b/megapixels/commands/demo/face_search.py
@@ -53,7 +53,7 @@ def cli(ctx, opt_fp_in, opt_data_store, opt_dataset, opt_results, opt_gpu):
   dataset.load_metadata(types.Metadata.FILE_RECORD)
   dataset.load_metadata(types.Metadata.FACE_VECTOR)
   dataset.load_metadata(types.Metadata.FACE_ROI)
-  # dataset.load_metadata(types.Metadata.IDENTITY)
+  dataset.load_metadata(types.Metadata.IDENTITY)
 
   # init face detection
   detector = face_detector.DetectorCVDNN()
@@ -82,22 +82,50 @@ def cli(ctx, opt_fp_in, opt_data_store, opt_dataset, opt_results, opt_gpu):
   image_records = dataset.find_matches(vec_query, n_results=opt_results)
 
   # summary
-  im_query = draw_utils.draw_bbox(im_query, bbox_norm, stroke_weight=8)
+  im_query = draw_utils.draw_bbox(im_query, bbox_norm, stroke_weight=4)
   ims_match = [im_query]
+
+  opt_size = (256,256)
+
   for image_record in image_records:
     image_record.summarize()
     log.info(f'{image_record.filepath}')
     im_match = cv.imread(image_record.filepath)
+    dim_match = im_match.shape[:2][::-1]
+    bbox_match = image_record.bbox
+    score = image_record.score
+    if score < .5:
+      clr = (0,255,0)
+    elif score < .6:
+      clr = (0,255,125)
+    elif score < .65:
+      clr = (0,125,125)
+    elif score < .7:
+      clr = (0,125,255)
+    else:
+      clr = (0,0,255)
+      
+    im_match = draw_utils.draw_bbox(im_match, bbox_match, stroke_weight=4, color=clr )
+    bbox_match_dim = bbox_match.to_dim(dim_match)
 
-    im_match_pil = Image.open(image_record.filepath).convert('RGB')
-    # bbox = 
-    ims_match.append(im_match)
+    im_pil = im_utils.ensure_pil(im_match)
+    center = (bbox_match_dim.cx, bbox_match_dim.cy)
+    im_pil = ImageOps.fit(im_pil, opt_size, centering=center)
+    im_np = im_utils.ensure_np(im_pil)
+    if image_record.identity is not None:
+      log.debug(f'identity: {image_record.identity.name_display}')
+    else:
+      log.debug('no identity info')
+    log.debug(f'score: {image_record.score}')
 
+    ims_match.append(im_np)
   # make montages of most similar faces
   montages = imutils.build_montages(ims_match, (256, 256), (3,2))
 
   # display 
   for i, montage in enumerate(montages):
     cv.imshow(f'{opt_dataset.name.upper()}: page {i}', montage)
+    fp_out = join(Path(opt_fp_in).parent, f'{Path(opt_fp_in).stem}_{i}.png')
+    cv.imwrite(fp_out, montage)
   
   display_utils.handle_keyboard()
diff --git a/megapixels/commands/cv/_old_files_to_face_rois.py b/megapixels/commands/processor/_old_files_to_face_rois.py
index d92cbd74..d92cbd74 100644
--- a/megapixels/commands/cv/_old_files_to_face_rois.py
+++ b/megapixels/commands/processor/_old_files_to_face_rois.py
diff --git a/megapixels/commands/cv/cluster.py b/megapixels/commands/processor/cluster.py
index 419091a0..419091a0 100644
--- a/megapixels/commands/cv/cluster.py
+++ b/megapixels/commands/processor/cluster.py
diff --git a/megapixels/commands/cv/crop.py b/megapixels/commands/processor/crop.py
index 778be0c4..778be0c4 100644
--- a/megapixels/commands/cv/crop.py
+++ b/megapixels/commands/processor/crop.py
diff --git a/megapixels/commands/cv/csv_to_faces.py b/megapixels/commands/processor/csv_to_faces.py
index 64c8b965..64c8b965 100644
--- a/megapixels/commands/cv/csv_to_faces.py
+++ b/megapixels/commands/processor/csv_to_faces.py
diff --git a/megapixels/commands/cv/csv_to_faces_mt.py b/megapixels/commands/processor/csv_to_faces_mt.py
index 64c8b965..64c8b965 100644
--- a/megapixels/commands/cv/csv_to_faces_mt.py
+++ b/megapixels/commands/processor/csv_to_faces_mt.py
diff --git a/megapixels/commands/cv/face_3ddfa.py b/megapixels/commands/processor/face_3ddfa.py
index ffc74180..ffc74180 100644
--- a/megapixels/commands/cv/face_3ddfa.py
+++ b/megapixels/commands/processor/face_3ddfa.py
diff --git a/megapixels/commands/cv/face_attributes.py b/megapixels/commands/processor/face_attributes.py
index 01fe3bd1..01fe3bd1 100644
--- a/megapixels/commands/cv/face_attributes.py
+++ b/megapixels/commands/processor/face_attributes.py
diff --git a/megapixels/commands/cv/face_frames.py b/megapixels/commands/processor/face_frames.py
index 76f23af1..76f23af1 100644
--- a/megapixels/commands/cv/face_frames.py
+++ b/megapixels/commands/processor/face_frames.py
diff --git a/megapixels/commands/cv/face_landmark_2d_5.py b/megapixels/commands/processor/face_landmark_2d_5.py
index 40ec6f41..40ec6f41 100644
--- a/megapixels/commands/cv/face_landmark_2d_5.py
+++ b/megapixels/commands/processor/face_landmark_2d_5.py
diff --git a/megapixels/commands/cv/face_landmark_2d_68.py b/megapixels/commands/processor/face_landmark_2d_68.py
index c6978a40..c6978a40 100644
--- a/megapixels/commands/cv/face_landmark_2d_68.py
+++ b/megapixels/commands/processor/face_landmark_2d_68.py
diff --git a/megapixels/commands/cv/face_landmark_3d_68.py b/megapixels/commands/processor/face_landmark_3d_68.py
index a2d14d72..a2d14d72 100644
--- a/megapixels/commands/cv/face_landmark_3d_68.py
+++ b/megapixels/commands/processor/face_landmark_3d_68.py
diff --git a/megapixels/commands/cv/face_pose.py b/megapixels/commands/processor/face_pose.py
index cb7ec56c..cb7ec56c 100644
--- a/megapixels/commands/cv/face_pose.py
+++ b/megapixels/commands/processor/face_pose.py
diff --git a/megapixels/commands/cv/face_roi.py b/megapixels/commands/processor/face_roi.py
index e83b0f61..fc933049 100644
--- a/megapixels/commands/cv/face_roi.py
+++ b/megapixels/commands/processor/face_roi.py
@@ -48,9 +48,9 @@ color_filters = {'color': 1, 'gray': 2, 'all': 3}
 @click.option('-f', '--force', 'opt_force', is_flag=True,
   help='Force overwrite file')
 @click.option('--color', 'opt_color_filter', 
-  type=click.Choice(color_filters.keys()), default='all',
+  type=click.Choice(color_filters.keys()), default='color',
   help='Filter to keep color or grayscale images (color = keep color')
-@click.option('--keep', 'opt_largest', type=click.Choice(['largest', 'all']), default='all',
+@click.option('--keep', 'opt_largest', type=click.Choice(['largest', 'all']), default='largest',
   help='Only keep largest face')
 @click.option('--zone', 'opt_zone', default=(0.0, 0.0), type=(float, float), 
   help='Face center must be located within zone region (0.5 = half width/height)')
diff --git a/megapixels/commands/cv/face_vector.py b/megapixels/commands/processor/face_vector.py
index cb155d08..cb155d08 100644
--- a/megapixels/commands/cv/face_vector.py
+++ b/megapixels/commands/processor/face_vector.py
diff --git a/megapixels/commands/cv/mirror.py b/megapixels/commands/processor/mirror.py
index 9ca1cac7..9ca1cac7 100644
--- a/megapixels/commands/cv/mirror.py
+++ b/megapixels/commands/processor/mirror.py
diff --git a/megapixels/commands/cv/resize.py b/megapixels/commands/processor/resize.py
index 7409ee6f..7409ee6f 100644
--- a/megapixels/commands/cv/resize.py
+++ b/megapixels/commands/processor/resize.py
diff --git a/megapixels/commands/cv/resize_dataset.py b/megapixels/commands/processor/resize_dataset.py
index 3a6ec15f..3a6ec15f 100644
--- a/megapixels/commands/cv/resize_dataset.py
+++ b/megapixels/commands/processor/resize_dataset.py
diff --git a/megapixels/commands/cv/videos_to_frames.py b/megapixels/commands/processor/videos_to_frames.py
index 0b56c46a..0b56c46a 100644
--- a/megapixels/commands/cv/videos_to_frames.py
+++ b/megapixels/commands/processor/videos_to_frames.py
author	adamhrv <adam@ahprojects.com>	2019-01-18 11:00:18 +0100
committer	adamhrv <adam@ahprojects.com>	2019-01-18 11:00:18 +0100
commit	e06af50389f849be0bfe4fa97d39f4519ef2c711 (patch)
tree	49755b51e1b8b1f8031e5483333570a8e9951272 /megapixels/commands
parent	03ad11fb2a3dcd425d50167b15d72d4e0ef536a2 (diff)