diff options
Diffstat (limited to 'megapixels/commands/datasets/50people.py')
| -rw-r--r-- | megapixels/commands/datasets/50people.py | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/megapixels/commands/datasets/50people.py b/megapixels/commands/datasets/50people.py new file mode 100644 index 00000000..fb35b2fe --- /dev/null +++ b/megapixels/commands/datasets/50people.py @@ -0,0 +1,129 @@ +from glob import glob +import os +from os.path import join +from pathlib import Path + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg +from app.utils import logger_utils + +import dlib +import pandas as pd +from PIL import Image, ImageOps, ImageFilter +from app.utils import file_utils, im_utils + + +log = logger_utils.Logger.getLogger() + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', + help='Output directory') +@click.option('--media', 'opt_dir_media', + help='Output directory') +@click.option('--action', 'opt_action', + type=click.Choice(['download']), + default='info', + help='Command action') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice): + """YTMU utils""" + + + from tqdm import tqdm + + # ------------------------------------------------- + # process + + if opt_action == 'download': + # downloads video files with ytdl + handle_download(opt_fp_in, opt_fp_out, opt_slice) + elif opt_action == 'face_frames': + handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice) + + + + + +def handle_face_frames(fp_in, dir_out, dir_videos): + if not dir_out or not dir_videos: + log.error('-o/--output and --videos required') + return + + import cv2 as cv + from tqdm import tqdm + from app.processors import face_detector + detector = face_detector.DetectorDLIBCNN() + + # get file list + fp_videos = glob(join(dir_videos, '*.mp4')) + fp_videos += glob(join(dir_videos, '*.webm')) + fp_videos += glob(join(dir_videos, '*.mkv')) + + face_interval = 30 + frame_interval_count = 0 + frame_count = 0 + + file_utils.mkdirs(dir_out) + + for fp_video in tqdm(fp_videos): + # log.debug('opening: {}'.format(fp_video)) + video = cv.VideoCapture(fp_video) + while video.isOpened(): + res, frame = video.read() + if not res: + break + + frame_count += 1 # for naming + frame_interval_count += 1 # for interval + bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0) + if len(bboxes) > 0 and frame_interval_count >= face_interval: + # save frame + fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count))) + cv.imwrite(fp_frame, frame) + frame_interval_count = 0 + + +def handle_download(fp_in, dir_out, opt_slice): + import youtube_dl + df = pd.read_csv(fp_in) + if opt_slice: + df = df[opt_slice[0]:opt_slice[1]] + df = df.fillna('') + fp_videos = glob(join(dir_out, '*.mp4')) + fp_videos += glob(join(dir_out, '*.webm')) + fp_videos += glob(join(dir_out, '*.mkv')) + + ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'}) + + for i, row in df.iterrows(): + vid = str(row['youtube_id']) + if not vid: + vid = row['vimeo_id'] + if vid: + vid = str(int(vid)) + url = 'https://vimeo.com/{}'.format(vid) + else: + url = 'https://youtube.com/watch?v={}'.format(vid) + if not vid: + log.warn('no video id: {} for {}'.format(vid, row['city'])) + continue + + found = False + for fp_video in fp_videos: + if vid in fp_video: + #log.debug('skip: {}'.format(vid)) + found = True + + if not found: + try: + with ydl: + ydl.download([url]) + except: + log.error('could not dl: {}'.format(vid)) |
