from glob import glob import os from os.path import join from pathlib import Path import click from app.settings import types from app.utils import click_utils from app.settings import app_cfg as cfg from app.utils import logger_utils import dlib import pandas as pd from PIL import Image, ImageOps, ImageFilter from app.utils import file_utils, im_utils log = logger_utils.Logger.getLogger() @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input directory') @click.option('-o', '--output', 'opt_fp_out', help='Output directory') @click.option('--media', 'opt_dir_media', help='Output directory') @click.option('--action', 'opt_action', type=click.Choice(['download']), default='info', help='Command action') @click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), help='Slice list of files') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice): """YTMU utils""" from tqdm import tqdm # ------------------------------------------------- # process if opt_action == 'download': # downloads video files with ytdl handle_download(opt_fp_in, opt_fp_out, opt_slice) elif opt_action == 'face_frames': handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice) def handle_face_frames(fp_in, dir_out, dir_videos): if not dir_out or not dir_videos: log.error('-o/--output and --videos required') return import cv2 as cv from tqdm import tqdm from app.processors import face_detector detector = face_detector.DetectorDLIBCNN() # get file list fp_videos = glob(join(dir_videos, '*.mp4')) fp_videos += glob(join(dir_videos, '*.webm')) fp_videos += glob(join(dir_videos, '*.mkv')) face_interval = 30 frame_interval_count = 0 frame_count = 0 file_utils.mkdirs(dir_out) for fp_video in tqdm(fp_videos): # log.debug('opening: {}'.format(fp_video)) video = cv.VideoCapture(fp_video) while video.isOpened(): res, frame = video.read() if not res: break frame_count += 1 # for naming frame_interval_count += 1 # for interval bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0) if len(bboxes) > 0 and frame_interval_count >= face_interval: # save frame fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count))) cv.imwrite(fp_frame, frame) frame_interval_count = 0 def handle_download(fp_in, dir_out, opt_slice): import youtube_dl df = pd.read_csv(fp_in) if opt_slice: df = df[opt_slice[0]:opt_slice[1]] df = df.fillna('') fp_videos = glob(join(dir_out, '*.mp4')) fp_videos += glob(join(dir_out, '*.webm')) fp_videos += glob(join(dir_out, '*.mkv')) ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'}) for i, row in df.iterrows(): vid = str(row['youtube_id']) if not vid: vid = row['vimeo_id'] if vid: vid = str(int(vid)) url = 'https://vimeo.com/{}'.format(vid) else: url = 'https://youtube.com/watch?v={}'.format(vid) if not vid: log.warn('no video id: {} for {}'.format(vid, row['city'])) continue found = False for fp_video in fp_videos: if vid in fp_video: #log.debug('skip: {}'.format(vid)) found = True if not found: try: with ydl: ydl.download([url]) except: log.error('could not dl: {}'.format(vid))