1 files changed, 129 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/50people.py b/megapixels/commands/datasets/50people.py
new file mode 100644
index 00000000..fb35b2fe
--- /dev/null
+++ b/megapixels/commands/datasets/50people.py
@@ -0,0 +1,129 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import dlib
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+  help='Output directory')
+@click.option('--media', 'opt_dir_media',
+  help='Output directory')
+@click.option('--action', 'opt_action', 
+  type=click.Choice(['download']),
+  default='info',
+  help='Command action')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+  help='Slice list of files')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice):
+  """YTMU utils"""
+
+  
+  from tqdm import tqdm
+
+  # -------------------------------------------------
+  # process  
+ 
+  if opt_action == 'download':
+    # downloads video files with ytdl
+    handle_download(opt_fp_in, opt_fp_out, opt_slice)
+  elif opt_action == 'face_frames':
+    handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice)
+
+
+
+
+
+def handle_face_frames(fp_in, dir_out, dir_videos):
+  if not dir_out or not dir_videos:
+    log.error('-o/--output and --videos required')
+    return
+  
+  import cv2 as cv
+  from tqdm import tqdm
+  from app.processors import face_detector
+  detector = face_detector.DetectorDLIBCNN()
+
+  # get file list
+  fp_videos = glob(join(dir_videos, '*.mp4'))
+  fp_videos += glob(join(dir_videos, '*.webm'))
+  fp_videos += glob(join(dir_videos, '*.mkv'))
+
+  face_interval = 30
+  frame_interval_count = 0
+  frame_count = 0
+
+  file_utils.mkdirs(dir_out)
+
+  for fp_video in tqdm(fp_videos):
+    # log.debug('opening: {}'.format(fp_video))
+    video = cv.VideoCapture(fp_video)
+    while video.isOpened():
+      res, frame = video.read()
+      if not res:
+        break
+
+      frame_count += 1  # for naming
+      frame_interval_count += 1  # for interval
+      bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0)
+      if len(bboxes) > 0 and frame_interval_count >= face_interval:
+        # save frame
+        fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count)))
+        cv.imwrite(fp_frame, frame)
+        frame_interval_count = 0
+
+
+def handle_download(fp_in, dir_out, opt_slice):
+  import youtube_dl
+  df = pd.read_csv(fp_in)
+  if opt_slice:
+    df = df[opt_slice[0]:opt_slice[1]]
+  df = df.fillna('')
+  fp_videos = glob(join(dir_out, '*.mp4'))
+  fp_videos += glob(join(dir_out, '*.webm'))
+  fp_videos += glob(join(dir_out, '*.mkv'))
+  
+  ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'})
+
+  for i, row in df.iterrows():
+    vid = str(row['youtube_id'])
+    if not vid:
+      vid = row['vimeo_id']
+      if vid:
+        vid = str(int(vid))
+        url = 'https://vimeo.com/{}'.format(vid)
+    else:
+      url = 'https://youtube.com/watch?v={}'.format(vid)
+    if not vid:
+      log.warn('no video id: {} for {}'.format(vid, row['city']))
+      continue
+    
+    found = False
+    for fp_video in fp_videos:
+      if vid in fp_video:
+        #log.debug('skip: {}'.format(vid))
+        found = True
+    
+    if not found:
+      try:
+        with ydl:
+          ydl.download([url])
+      except:
+        log.error('could not dl: {}'.format(vid))