megapixels/commands/datasets/50people.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

from glob import glob
import os
from os.path import join
from pathlib import Path

import click

from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils import logger_utils

import dlib
import pandas as pd
from PIL import Image, ImageOps, ImageFilter
from app.utils import file_utils, im_utils


log = logger_utils.Logger.getLogger()

@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
  help='Input directory')
@click.option('-o', '--output', 'opt_fp_out',
  help='Output directory')
@click.option('--media', 'opt_dir_media',
  help='Output directory')
@click.option('--action', 'opt_action', 
  type=click.Choice(['download']),
  default='info',
  help='Command action')
@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
  help='Slice list of files')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice):
  """YTMU utils"""

  
  from tqdm import tqdm

  # -------------------------------------------------
  # process  
 
  if opt_action == 'download':
    # downloads video files with ytdl
    handle_download(opt_fp_in, opt_fp_out, opt_slice)
  elif opt_action == 'face_frames':
    handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice)


def handle_face_frames(fp_in, dir_out, dir_videos):
  if not dir_out or not dir_videos:
    log.error('-o/--output and --videos required')
    return
  
  import cv2 as cv
  from tqdm import tqdm
  from app.processors import face_detector
  detector = face_detector.DetectorDLIBCNN()

  # get file list
  fp_videos = glob(join(dir_videos, '*.mp4'))
  fp_videos += glob(join(dir_videos, '*.webm'))
  fp_videos += glob(join(dir_videos, '*.mkv'))

  face_interval = 30
  frame_interval_count = 0
  frame_count = 0

  file_utils.mkdirs(dir_out)

  for fp_video in tqdm(fp_videos):
    # log.debug('opening: {}'.format(fp_video))
    video = cv.VideoCapture(fp_video)
    while video.isOpened():
      res, frame = video.read()
      if not res:
        break

      frame_count += 1  # for naming
      frame_interval_count += 1  # for interval
      bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0)
      if len(bboxes) > 0 and frame_interval_count >= face_interval:
        # save frame
        fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count)))
        cv.imwrite(fp_frame, frame)
        frame_interval_count = 0


def handle_download(fp_in, dir_out, opt_slice):
  import youtube_dl
  df = pd.read_csv(fp_in)
  if opt_slice:
    df = df[opt_slice[0]:opt_slice[1]]
  df = df.fillna('')
  fp_videos = glob(join(dir_out, '*.mp4'))
  fp_videos += glob(join(dir_out, '*.webm'))
  fp_videos += glob(join(dir_out, '*.mkv'))
  
  ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'})

  for i, row in df.iterrows():
    vid = str(row['youtube_id'])
    if not vid:
      vid = row['vimeo_id']
      if vid:
        vid = str(int(vid))
        url = 'https://vimeo.com/{}'.format(vid)
    else:
      url = 'https://youtube.com/watch?v={}'.format(vid)
    if not vid:
      log.warn('no video id: {} for {}'.format(vid, row['city']))
      continue
    
    found = False
    for fp_video in fp_videos:
      if vid in fp_video:
        #log.debug('skip: {}'.format(vid))
        found = True
    
    if not found:
      try:
        with ydl:
          ydl.download([url])
      except:
        log.error('could not dl: {}'.format(vid))