summaryrefslogtreecommitdiff
path: root/Code/process_data.py
blob: 170959aef8d99124721658494d1eb04f953293df (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import getopt
import sys
from glob import glob

import constants as c
from utils import process_clip


def process_training_data(num_clips):
    """
    Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by
    default.

    @param num_clips: The number of clips to process. Default = 5000000 (set in __main__).

    @warning: This can take a couple of hours to complete with large numbers of clips.
    """
    num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*'))

    for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips):
        clip = process_clip()

        np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip)

        if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1)


def usage():
    print 'Options:'
    print '-n/--num_clips= <# clips to process for training>'
    print '-t/--train_dir= <Directory of full training frames>'
    print '-c/--clips_dir= <Save directory for processed clips>'
    print "                (I suggest making this a hidden dir so the filesystem doesn't freeze"
    print "                 with so many files. DON'T `ls` THIS DIRECTORY!)"
    print '-o/--overwrite  (Overwrites the previous data in the training dir)'


def main():
    ##
    # Handle command line input
    ##

    num_clips = 5000000

    try:
        opts, _ = getopt.getopt(sys.argv[1:], 'n:t:c:o',
                                ['num_clips=', 'train_dir=', 'clips_dir=', 'overwrite'])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ('-n', '--num_clips'):
            num_clips = int(arg)
        if opt in ('-t', '--train_dir'):
            c.TRAIN_DIR = c.get_dir(arg)
        if opt in ('-c', '--clips_dir'):
            c.TRAIN_DIR_CLIPS = c.get_dir(arg)
        if opt in ('-o', '--overwrite'):
            c.clear_dir(c.TRAIN_DIR_CLIPS)

    ##
    # Process data for training
    ##

    process_training_data(num_clips)


if __name__ == '__main__':
    main()