summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/50people.py
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-05 12:00:15 +0100
committeradamhrv <adam@ahprojects.com>2018-12-05 12:00:15 +0100
commit90abf459d1df1f21960c1d653a1f936d1ec30256 (patch)
treefacab8e9bac6c56e69c369c2140cdbea218a01df /megapixels/commands/datasets/50people.py
parent0529d4cd1618016319e995c37aa118bf8c2d501b (diff)
.
Diffstat (limited to 'megapixels/commands/datasets/50people.py')
-rw-r--r--megapixels/commands/datasets/50people.py129
1 files changed, 129 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/50people.py b/megapixels/commands/datasets/50people.py
new file mode 100644
index 00000000..fb35b2fe
--- /dev/null
+++ b/megapixels/commands/datasets/50people.py
@@ -0,0 +1,129 @@
+from glob import glob
+import os
+from os.path import join
+from pathlib import Path
+
+import click
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+from app.utils import logger_utils
+
+import dlib
+import pandas as pd
+from PIL import Image, ImageOps, ImageFilter
+from app.utils import file_utils, im_utils
+
+
+log = logger_utils.Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input directory')
+@click.option('-o', '--output', 'opt_fp_out',
+ help='Output directory')
+@click.option('--media', 'opt_dir_media',
+ help='Output directory')
+@click.option('--action', 'opt_action',
+ type=click.Choice(['download']),
+ default='info',
+ help='Command action')
+@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
+ help='Slice list of files')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice):
+ """YTMU utils"""
+
+
+ from tqdm import tqdm
+
+ # -------------------------------------------------
+ # process
+
+ if opt_action == 'download':
+ # downloads video files with ytdl
+ handle_download(opt_fp_in, opt_fp_out, opt_slice)
+ elif opt_action == 'face_frames':
+ handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice)
+
+
+
+
+
+def handle_face_frames(fp_in, dir_out, dir_videos):
+ if not dir_out or not dir_videos:
+ log.error('-o/--output and --videos required')
+ return
+
+ import cv2 as cv
+ from tqdm import tqdm
+ from app.processors import face_detector
+ detector = face_detector.DetectorDLIBCNN()
+
+ # get file list
+ fp_videos = glob(join(dir_videos, '*.mp4'))
+ fp_videos += glob(join(dir_videos, '*.webm'))
+ fp_videos += glob(join(dir_videos, '*.mkv'))
+
+ face_interval = 30
+ frame_interval_count = 0
+ frame_count = 0
+
+ file_utils.mkdirs(dir_out)
+
+ for fp_video in tqdm(fp_videos):
+ # log.debug('opening: {}'.format(fp_video))
+ video = cv.VideoCapture(fp_video)
+ while video.isOpened():
+ res, frame = video.read()
+ if not res:
+ break
+
+ frame_count += 1 # for naming
+ frame_interval_count += 1 # for interval
+ bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0)
+ if len(bboxes) > 0 and frame_interval_count >= face_interval:
+ # save frame
+ fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count)))
+ cv.imwrite(fp_frame, frame)
+ frame_interval_count = 0
+
+
+def handle_download(fp_in, dir_out, opt_slice):
+ import youtube_dl
+ df = pd.read_csv(fp_in)
+ if opt_slice:
+ df = df[opt_slice[0]:opt_slice[1]]
+ df = df.fillna('')
+ fp_videos = glob(join(dir_out, '*.mp4'))
+ fp_videos += glob(join(dir_out, '*.webm'))
+ fp_videos += glob(join(dir_out, '*.mkv'))
+
+ ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'})
+
+ for i, row in df.iterrows():
+ vid = str(row['youtube_id'])
+ if not vid:
+ vid = row['vimeo_id']
+ if vid:
+ vid = str(int(vid))
+ url = 'https://vimeo.com/{}'.format(vid)
+ else:
+ url = 'https://youtube.com/watch?v={}'.format(vid)
+ if not vid:
+ log.warn('no video id: {} for {}'.format(vid, row['city']))
+ continue
+
+ found = False
+ for fp_video in fp_videos:
+ if vid in fp_video:
+ #log.debug('skip: {}'.format(vid))
+ found = True
+
+ if not found:
+ try:
+ with ydl:
+ ydl.download([url])
+ except:
+ log.error('could not dl: {}'.format(vid))