summaryrefslogtreecommitdiff
path: root/Code/process_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/process_data.py')
-rw-r--r--Code/process_data.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/Code/process_data.py b/Code/process_data.py
new file mode 100644
index 0000000..170959a
--- /dev/null
+++ b/Code/process_data.py
@@ -0,0 +1,71 @@
+import numpy as np
+import getopt
+import sys
+from glob import glob
+
+import constants as c
+from utils import process_clip
+
+
+def process_training_data(num_clips):
+ """
+ Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by
+ default.
+
+ @param num_clips: The number of clips to process. Default = 5000000 (set in __main__).
+
+ @warning: This can take a couple of hours to complete with large numbers of clips.
+ """
+ num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*'))
+
+ for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips):
+ clip = process_clip()
+
+ np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip)
+
+ if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1)
+
+
+def usage():
+ print 'Options:'
+ print '-n/--num_clips= <# clips to process for training>'
+ print '-t/--train_dir= <Directory of full training frames>'
+ print '-c/--clips_dir= <Save directory for processed clips>'
+ print " (I suggest making this a hidden dir so the filesystem doesn't freeze"
+ print " with so many files. DON'T `ls` THIS DIRECTORY!)"
+ print '-o/--overwrite (Overwrites the previous data in the training dir)'
+
+
+def main():
+ ##
+ # Handle command line input
+ ##
+
+ num_clips = 5000000
+
+ try:
+ opts, _ = getopt.getopt(sys.argv[1:], 'n:t:c:o',
+ ['num_clips=', 'train_dir=', 'clips_dir=', 'overwrite'])
+ except getopt.GetoptError:
+ usage()
+ sys.exit(2)
+
+ for opt, arg in opts:
+ if opt in ('-n', '--num_clips'):
+ num_clips = int(arg)
+ if opt in ('-t', '--train_dir'):
+ c.TRAIN_DIR = c.get_dir(arg)
+ if opt in ('-c', '--clips_dir'):
+ c.TRAIN_DIR_CLIPS = c.get_dir(arg)
+ if opt in ('-o', '--overwrite'):
+ c.clear_dir(c.TRAIN_DIR_CLIPS)
+
+ ##
+ # Process data for training
+ ##
+
+ process_training_data(num_clips)
+
+
+if __name__ == '__main__':
+ main()