1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
from glob import glob
import os
from os.path import join
from pathlib import Path
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils import logger_utils
import dlib
import pandas as pd
from PIL import Image, ImageOps, ImageFilter
from app.utils import file_utils, im_utils
log = logger_utils.Logger.getLogger()
@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
help='Input directory')
@click.option('-o', '--output', 'opt_fp_out',
help='Output directory')
@click.option('--media', 'opt_dir_media',
help='Output directory')
@click.option('--action', 'opt_action',
type=click.Choice(['download']),
default='info',
help='Command action')
@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
help='Slice list of files')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_dir_media, opt_action, opt_slice):
"""YTMU utils"""
from tqdm import tqdm
# -------------------------------------------------
# process
if opt_action == 'download':
# downloads video files with ytdl
handle_download(opt_fp_in, opt_fp_out, opt_slice)
elif opt_action == 'face_frames':
handle_face_frames(opt_fp_in, opt_fp_out, dir_media, opt_slice)
def handle_face_frames(fp_in, dir_out, dir_videos):
if not dir_out or not dir_videos:
log.error('-o/--output and --videos required')
return
import cv2 as cv
from tqdm import tqdm
from app.processors import face_detector
detector = face_detector.DetectorDLIBCNN()
# get file list
fp_videos = glob(join(dir_videos, '*.mp4'))
fp_videos += glob(join(dir_videos, '*.webm'))
fp_videos += glob(join(dir_videos, '*.mkv'))
face_interval = 30
frame_interval_count = 0
frame_count = 0
file_utils.mkdirs(dir_out)
for fp_video in tqdm(fp_videos):
# log.debug('opening: {}'.format(fp_video))
video = cv.VideoCapture(fp_video)
while video.isOpened():
res, frame = video.read()
if not res:
break
frame_count += 1 # for naming
frame_interval_count += 1 # for interval
bboxes = detector.detect(frame, opt_size=(320, 240), opt_pyramids=0)
if len(bboxes) > 0 and frame_interval_count >= face_interval:
# save frame
fp_frame = join(dir_out, '{}_{}.jpg'.format(Path(fp_video).stem, file_utils.zpad(frame_count)))
cv.imwrite(fp_frame, frame)
frame_interval_count = 0
def handle_download(fp_in, dir_out, opt_slice):
import youtube_dl
df = pd.read_csv(fp_in)
if opt_slice:
df = df[opt_slice[0]:opt_slice[1]]
df = df.fillna('')
fp_videos = glob(join(dir_out, '*.mp4'))
fp_videos += glob(join(dir_out, '*.webm'))
fp_videos += glob(join(dir_out, '*.mkv'))
ydl = youtube_dl.YoutubeDL({'outtmpl': join(dir_out, '') + '%(id)s.%(ext)s'})
for i, row in df.iterrows():
vid = str(row['youtube_id'])
if not vid:
vid = row['vimeo_id']
if vid:
vid = str(int(vid))
url = 'https://vimeo.com/{}'.format(vid)
else:
url = 'https://youtube.com/watch?v={}'.format(vid)
if not vid:
log.warn('no video id: {} for {}'.format(vid, row['city']))
continue
found = False
for fp_video in fp_videos:
if vid in fp_video:
#log.debug('skip: {}'.format(vid))
found = True
if not found:
try:
with ydl:
ydl.download([url])
except:
log.error('could not dl: {}'.format(vid))
|