First commit

author: Matt Cooper <matthew_cooper@brown.edu> 2016-08-12 16:48:46 -0400
committer: Matt Cooper <matthew_cooper@brown.edu> 2016-08-12 16:48:46 -0400
commit: 0a3fd5b62065333669c7b391c626cb2505217617 (patch)
tree: 04be2e559272d62e22c08258d0c72d759a00265d /Code
11 files changed, 2079 insertions, 0 deletions
diff --git a/Code/avg_runner.py b/Code/avg_runner.py
new file mode 100644
index 0000000..5de994b
--- /dev/null
+++ b/Code/avg_runner.py
@@ -0,0 +1,173 @@
+import tensorflow as tf
+import getopt
+import sys
+
+from utils import get_train_batch, get_test_batch
+import constants as c
+from g_model import GeneratorModel
+from d_model import DiscriminatorModel
+
+
+class AVGRunner:
+    def __init__(self, model_load_path, num_test_rec):
+        """
+        Initializes the Adversarial Video Generation Runner.
+
+        @param model_load_path: The path from which to load a previously-saved model.
+                                Default = None.
+        @param num_test_rec: The number of recursive generations to produce when testing. Recursive
+                             generations use previous generations as input to predict further into
+                             the future.
+        """
+
+        self.global_step = 0
+        self.num_test_rec = num_test_rec
+
+        self.sess = tf.Session()
+        self.summary_writer = tf.train.SummaryWriter(c.SUMMARY_SAVE_DIR, graph=self.sess.graph)
+
+        if c.ADVERSARIAL:
+            print 'Init discriminator...'
+            self.d_model = DiscriminatorModel(self.sess,
+                                              self.summary_writer,
+                                              c.TRAIN_HEIGHT,
+                                              c.TRAIN_WIDTH,
+                                              c.SCALE_CONV_FMS_D,
+                                              c.SCALE_KERNEL_SIZES_D,
+                                              c.SCALE_FC_LAYER_SIZES_D)
+
+        print 'Init generator...'
+        self.g_model = GeneratorModel(self.sess,
+                                      self.summary_writer,
+                                      c.TRAIN_HEIGHT,
+                                      c.TRAIN_WIDTH,
+                                      c.TEST_HEIGHT,
+                                      c.TEST_WIDTH,
+                                      c.SCALE_FMS_G,
+                                      c.SCALE_KERNEL_SIZES_G)
+
+        print 'Init variables...'
+        self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=2)
+        self.sess.run(tf.initialize_all_variables())
+
+        # if load path specified, load a saved model
+        if model_load_path is not None:
+            self.saver.restore(self.sess, model_load_path)
+            print 'Model restored from ' + model_load_path
+
+    def train(self):
+        """
+        Runs a training loop on the model networks.
+        """
+        while True:
+            if c.ADVERSARIAL:
+                # update discriminator
+                batch = get_train_batch()
+                print 'Training discriminator...'
+                self.d_model.train_step(batch, self.g_model)
+
+            # update generator
+            batch = get_train_batch()
+            print 'Training generator...'
+            self.global_step = self.g_model.train_step(
+                batch, discriminator=(self.d_model if c.ADVERSARIAL else None))
+
+            # save the models
+            if self.global_step % c.MODEL_SAVE_FREQ == 0:
+                print '-' * 30
+                print 'Saving models...'
+                self.saver.save(self.sess,
+                                c.MODEL_SAVE_DIR + 'model.ckpt',
+                                global_step=self.global_step)
+                print 'Saved models!'
+                print '-' * 30
+
+            # test generator model
+            if self.global_step % c.TEST_FREQ == 0:
+                self.test()
+
+    def test(self):
+        """
+        Runs one test step on the generator network.
+        """
+        batch = get_test_batch(c.BATCH_SIZE, num_rec_out=self.num_test_rec)
+        self.g_model.test_batch(
+            batch, self.global_step, num_rec_out=self.num_test_rec)
+
+
+def usage():
+    print 'Options:'
+    print '-l/--load_path=    <Relative/path/to/saved/model>'
+    print '-t/--test_dir=     <Directory of test images>'
+    print '-r/--recursions=   <# recursive predictions to make on test>'
+    print '-a/--adversarial=  <{t/f}> (Whether to use adversarial training. Default=True)'
+    print '-n/--name=         <Subdirectory of ../Data/Save/*/ in which to save output of this run>'
+    print '-O/--overwrite     (Overwrites all previous data for the model with this save name)'
+    print '-T/--test_only     (Only runs a test step -- no training)'
+    print '-H/--help          (prints usage)'
+    print '--stats_freq=      <how often to print loss/train error stats, in # steps>'
+    print '--summary_freq=    <how often to save loss/error summaries, in # steps>'
+    print '--img_save_freq=   <how often to save generated images, in # steps>'
+    print '--test_freq=       <how often to test the model on test data, in # steps>'
+    print '--model_save_freq= <how often to save the model, in # steps>'
+
+
+def main():
+    ##
+    # Handle command line input.
+    ##
+
+    load_path = None
+    test_only = False
+    num_test_rec = 1  # number of recursive predictions to make on test
+    try:
+        opts, _ = getopt.getopt(sys.argv[1:], 'l:t:r:a:n:OTH',
+                                ['load_path=', 'test_dir=', 'recursions=', 'adversarial=', 'name=',
+                                 'overwrite', 'test_only', 'help', 'stats_freq=', 'summary_freq=',
+                                 'img_save_freq=', 'test_freq=', 'model_save_freq='])
+    except getopt.GetoptError:
+        usage()
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt in ('-l', '--load_path'):
+            load_path = arg
+        if opt in ('-t', '--test_dir'):
+            c.set_test_dir(arg)
+        if opt in ('-r', '--recursions'):
+            num_test_rec = int(arg)
+        if opt in ('-a', '--adversarial'):
+            c.ADVERSARIAL = (arg.lower() == 'true' or arg.lower() == 't')
+        if opt in ('-n', '--name'):
+            c.set_save_name(arg)
+        if opt in ('-O', '--overwrite'):
+            c.clear_save_name()
+        if opt in ('-H', '--help'):
+            usage()
+            sys.exit(2)
+        if opt in ('-T', '--test_only'):
+            test_only = True
+        if opt == '--stats_freq':
+            c.STATS_FREQ = int(arg)
+        if opt == '--summary_freq':
+            c.SUMMARY_FREQ = int(arg)
+        if opt == '--img_save_freq':
+            c.IMG_SAVE_FREQ = int(arg)
+        if opt == '--test_freq':
+            c.TEST_FREQ = int(arg)
+        if opt == '--model_save_freq':
+            c.MODEL_SAVE_FREQ = int(arg)
+
+    ##
+    # Init and run the predictor
+    ##
+
+    runner = AVGRunner(load_path, num_test_rec)
+    if test_only:
+        runner.test()
+    else:
+        runner.train()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Code/constants.py b/Code/constants.py
new file mode 100644
index 0000000..afe8f9d
--- /dev/null
+++ b/Code/constants.py
@@ -0,0 +1,198 @@
+import numpy as np
+import os
+from glob import glob
+import shutil
+from datetime import datetime
+from scipy.ndimage import imread
+
+##
+# Data
+##
+
+def get_date_str():
+    """
+    @return: A string representing the current date/time that can be used as a directory name.
+    """
+    return str(datetime.now()).replace(' ', '_').replace(':', '.')[:-10]
+
+def get_dir(directory):
+    """
+    Creates the given directory if it does not exist.
+
+    @param directory: The path to the directory.
+    @return: The path to the directory.
+    """
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    return directory
+
+def clear_dir(directory):
+    """
+    Removes all files in the given directory.
+
+    @param directory: The path to the directory.
+    """
+    for f in os.listdir(directory):
+        path = os.path.join(directory, f)
+        try:
+            if os.path.isfile(path):
+                os.unlink(path)
+            elif os.path.isdir(path):
+                shutil.rmtree(path)
+        except Exception as e:
+            print(e)
+
+def get_test_frame_dims():
+    img_path = glob(TEST_DIR + '*/*')[0]
+    img = imread(img_path, mode='RGB')
+    shape = np.shape(img)
+
+    return shape[0], shape[1]
+
+def set_test_dir(directory):
+    """
+    Edits all constants dependent on TEST_DIR.
+
+    @param directory: The new test directory.
+    """
+    global TEST_DIR, TEST_HEIGHT, TEST_WIDTH
+
+    TEST_DIR = directory
+    TEST_HEIGHT, TEST_WIDTH = get_test_frame_dims()
+
+# root directory for all data
+DATA_DIR = get_dir('../Data/')
+# directory of unprocessed training frames
+TRAIN_DIR = DATA_DIR + 'Ms_Pacman/Train/'
+# directory of unprocessed test frames
+TEST_DIR = DATA_DIR + 'Ms_Pacman/Test/'
+# Directory of processed training clips.
+# hidden so finder doesn't freeze w/ so many files. DON'T USE `ls` COMMAND ON THIS DIR!
+TRAIN_DIR_CLIPS = get_dir(DATA_DIR + '.Clips/')
+
+# For processing clips. l2 diff between frames must be greater than this
+MOVEMENT_THRESHOLD = 100
+# total number of processed clips in TRAIN_DIR_CLIPS
+NUM_CLIPS = len(glob(TRAIN_DIR_CLIPS + '*'))
+
+# the height and width of the full frames to test on
+TEST_HEIGHT, TEST_WIDTH = get_test_frame_dims()
+# the height and width of the patches to train on
+TRAIN_HEIGHT = TRAIN_WIDTH = 32
+
+##
+# Output
+##
+
+def set_save_name(name):
+    """
+    Edits all constants dependent on SAVE_NAME.
+
+    @param name: The new save name.
+    """
+    global SAVE_NAME, MODEL_SAVE_DIR, SUMMARY_SAVE_DIR, IMG_SAVE_DIR
+
+    SAVE_NAME = name
+    MODEL_SAVE_DIR = get_dir(SAVE_DIR + 'Models/' + SAVE_NAME)
+    SUMMARY_SAVE_DIR = get_dir(SAVE_DIR + 'Summaries/' + SAVE_NAME)
+    IMG_SAVE_DIR = get_dir(SAVE_DIR + 'Images/' + SAVE_NAME)
+
+def clear_save_name():
+    """
+    Clears all saved content for SAVE_NAME.
+    """
+    clear_dir(MODEL_SAVE_DIR)
+    clear_dir(SUMMARY_SAVE_DIR)
+    clear_dir(IMG_SAVE_DIR)
+
+
+# root directory for all saved content
+SAVE_DIR = get_dir('../Save/')
+
+# inner directory to differentiate between runs
+SAVE_NAME = 'Default/'
+# directory for saved models
+MODEL_SAVE_DIR = get_dir(SAVE_DIR + 'Models/' + SAVE_NAME)
+# directory for saved TensorBoard summaries
+SUMMARY_SAVE_DIR = get_dir(SAVE_DIR + 'Summaries/' + SAVE_NAME)
+# directory for saved images
+IMG_SAVE_DIR = get_dir(SAVE_DIR + 'Images/' + SAVE_NAME)
+
+
+STATS_FREQ      = 10     # how often to print loss/train error stats, in # steps
+SUMMARY_FREQ    = 100    # how often to save the summaries, in # steps
+IMG_SAVE_FREQ   = 1000   # how often to save generated images, in # steps
+TEST_FREQ       = 5000   # how often to test the model on test data, in # steps
+MODEL_SAVE_FREQ = 10000  # how often to save the model, in # steps
+
+##
+# General training
+##
+
+# whether to use adversarial training vs. basic training of the generator
+ADVERSARIAL = True
+# the training minibatch size
+BATCH_SIZE = 8
+# the number of history frames to give as input to the network
+HIST_LEN = 4
+
+##
+# Loss parameters
+##
+
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss
+LAM_LP = 1
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+
+##
+# Generator model
+##
+
+# learning rate for the generator model
+LRATE_G = 0.00004  # Value in paper is 0.04
+# padding for convolutions in the generator model
+PADDING_G = 'SAME'
+# feature maps for each convolution of each scale network in the generator model
+# e.g SCALE_FMS_G[1][2] is the input of the 3rd convolution in the 2nd scale network.
+SCALE_FMS_G = [[3 * HIST_LEN, 128, 256, 128, 3],
+               [3 * (HIST_LEN + 1), 128, 256, 128, 3],
+               [3 * (HIST_LEN + 1), 128, 256, 512, 256, 128, 3],
+               [3 * (HIST_LEN + 1), 128, 256, 512, 256, 128, 3]]
+# kernel sizes for each convolution of each scale network in the generator model
+SCALE_KERNEL_SIZES_G = [[3, 3, 3, 3],
+                        [5, 3, 3, 5],
+                        [5, 3, 3, 3, 3, 5],
+                        [7, 5, 5, 5, 5, 7]]
+
+
+##
+# Discriminator model
+##
+
+# learning rate for the discriminator model
+LRATE_D = 0.02
+# padding for convolutions in the discriminator model
+PADDING_D = 'VALID'
+# feature maps for each convolution of each scale network in the discriminator model
+SCALE_CONV_FMS_D = [[3, 64],
+                    [3, 64, 128, 128],
+                    [3, 128, 256, 256],
+                    [3, 128, 256, 512, 128]]
+# kernel sizes for each convolution of each scale network in the discriminator model
+SCALE_KERNEL_SIZES_D = [[3],
+                        [3, 3, 3],
+                        [5, 5, 5],
+                        [7, 7, 5, 5]]
+# layer sizes for each fully-connected layer of each scale network in the discriminator model
+# layer connecting conv to fully-connected is dynamically generated when creating the model
+SCALE_FC_LAYER_SIZES_D = [[512, 256, 1],
+                          [1024, 512, 1],
+                          [1024, 512, 1],
+                          [1024, 512, 1]]
diff --git a/Code/d_model.py b/Code/d_model.py
new file mode 100644
index 0000000..7b1cb12
--- /dev/null
+++ b/Code/d_model.py
@@ -0,0 +1,187 @@
+import tensorflow as tf
+import numpy as np
+from skimage.transform import resize
+
+from d_scale_model import DScaleModel
+from loss_functions import adv_loss
+import constants as c
+
+
+# noinspection PyShadowingNames
+class DiscriminatorModel:
+    def __init__(self, session, summary_writer, height, width, scale_conv_layer_fms,
+                 scale_kernel_sizes, scale_fc_layer_sizes):
+        """
+        Initializes a GeneratorModel.
+
+        @param session: The TensorFlow session.
+        @param summary_writer: The writer object to record TensorBoard summaries
+        @param height: The height of the input images.
+        @param width: The width of the input images.
+        @param scale_conv_layer_fms: The number of feature maps in each convolutional layer of each
+                                     scale network.
+        @param scale_kernel_sizes: The size of the kernel for each layer of each scale network.
+        @param scale_fc_layer_sizes: The number of nodes in each fully-connected layer of each scale
+                               network.
+
+        @type session: tf.Session
+        @type summary_writer: tf.train.SummaryWriter
+        @type height: int
+        @type width: int
+        @type scale_conv_layer_fms: list<list<int>>
+        @type scale_kernel_sizes: list<list<int>>
+        @type scale_fc_layer_sizes: list<list<int>>
+        """
+        self.sess = session
+        self.summary_writer = summary_writer
+        self.height = height
+        self.width = width
+        self.scale_conv_layer_fms = scale_conv_layer_fms
+        self.scale_kernel_sizes = scale_kernel_sizes
+        self.scale_fc_layer_sizes = scale_fc_layer_sizes
+        self.num_scale_nets = len(scale_conv_layer_fms)
+
+        self.define_graph()
+
+    # noinspection PyAttributeOutsideInit
+    def define_graph(self):
+        """
+        Sets up the model graph in TensorFlow.
+        """
+        with tf.name_scope('discriminator'):
+            ##
+            # Setup scale networks. Each will make the predictions for images at a given scale.
+            ##
+
+            self.scale_nets = []
+            for scale_num in xrange(self.num_scale_nets):
+                with tf.name_scope('scale_net_' + str(scale_num)):
+                    scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num)
+                    self.scale_nets.append(DScaleModel(scale_num,
+                                                       int(self.height * scale_factor),
+                                                       int(self.width * scale_factor),
+                                                       self.scale_conv_layer_fms[scale_num],
+                                                       self.scale_kernel_sizes[scale_num],
+                                                       self.scale_fc_layer_sizes[scale_num]))
+
+            # A list of the prediction tensors for each scale network
+            self.scale_preds = []
+            for scale_num in xrange(self.num_scale_nets):
+                self.scale_preds.append(self.scale_nets[scale_num].preds)
+
+            ##
+            # Data
+            ##
+
+            self.labels = tf.placeholder(tf.float32, shape=[None, 1], name='labels')
+
+            ##
+            # Training
+            ##
+
+            with tf.name_scope('training'):
+                # global loss is the combined loss from every scale network
+                self.global_loss = adv_loss(self.scale_preds, self.labels)
+                self.global_step = tf.Variable(0, trainable=False, name='global_step')
+                self.optimizer = tf.train.GradientDescentOptimizer(c.LRATE_D, name='optimizer')
+                self.train_op = self.optimizer.minimize(self.global_loss,
+                                                        global_step=self.global_step,
+                                                        name='train_op')
+
+                # add summaries to visualize in TensorBoard
+                loss_summary = tf.scalar_summary('loss_D', self.global_loss)
+                self.summaries = tf.merge_summary([loss_summary])
+
+    def build_feed_dict(self, input_frames, gt_output_frames, generator):
+        """
+        Builds a feed_dict with resized inputs and outputs for each scale network.
+
+        @param input_frames: An array of shape
+                             [batch_size x self.height x self.width x (3 * HIST_LEN)], The frames to
+                             use for generation.
+        @param gt_output_frames: An array of shape [batch_size x self.height x self.width x 3], The
+                                 ground truth outputs for each sequence in input_frames.
+        @param generator: The generator model.
+
+        @return: The feed_dict needed to run this network, all scale_nets, and the generator
+                 predictions.
+        """
+        feed_dict = {}
+        batch_size = np.shape(gt_output_frames)[0]
+
+        ##
+        # Get generated frames from GeneratorModel
+        ##
+
+        g_feed_dict = {generator.input_frames_train: input_frames,
+                       generator.gt_frames_train: gt_output_frames}
+        g_scale_preds = self.sess.run(generator.scale_preds_train, feed_dict=g_feed_dict)
+
+        ##
+        # Create discriminator feed dict
+        ##
+        for scale_num in xrange(self.num_scale_nets):
+            scale_net = self.scale_nets[scale_num]
+
+            # resize gt_output_frames
+            scaled_gt_output_frames = np.empty([batch_size, scale_net.height, scale_net.width, 3])
+            for i, img in enumerate(gt_output_frames):
+                # for skimage.transform.resize, images need to be in range [0, 1], so normalize to
+                # [0, 1] before resize and back to [-1, 1] after
+                sknorm_img = (img / 2) + 0.5
+                resized_frame = resize(sknorm_img, [scale_net.height, scale_net.width, 3])
+                scaled_gt_output_frames[i] = (resized_frame - 0.5) * 2
+
+            # combine with resized gt_output_frames to get inputs for prediction
+            scaled_input_frames = np.concatenate([g_scale_preds[scale_num],
+                                                  scaled_gt_output_frames])
+
+            # convert to np array and add to feed_dict
+            feed_dict[scale_net.input_frames] = scaled_input_frames
+
+        # add labels for each image to feed_dict
+        batch_size = np.shape(input_frames)[0]
+        feed_dict[self.labels] = np.concatenate([np.zeros([batch_size, 1]),
+                                                 np.ones([batch_size, 1])])
+
+        return feed_dict
+
+    def train_step(self, batch, generator):
+        """
+        Runs a training step using the global loss on each of the scale networks.
+
+        @param batch: An array of shape
+                      [BATCH_SIZE x self.height x self.width x (3 * (HIST_LEN + 1))]. The input
+                      and output frames, concatenated along the channel axis (index 3).
+        @param generator: The generator model.
+
+        @return: The global step.
+        """
+        ##
+        # Split into inputs and outputs
+        ##
+
+        input_frames = batch[:, :, :, :-3]
+        gt_output_frames = batch[:, :, :, -3:]
+
+        ##
+        # Train
+        ##
+
+        feed_dict = self.build_feed_dict(input_frames, gt_output_frames, generator)
+
+        _, global_loss, global_step, summaries = self.sess.run(
+            [self.train_op, self.global_loss, self.global_step, self.summaries],
+            feed_dict=feed_dict)
+
+        ##
+        # User output
+        ##
+
+        if global_step % c.STATS_FREQ == 0:
+            print 'DiscriminatorModel: step %d | global loss: %f' % (global_step, global_loss)
+        if global_step % c.SUMMARY_FREQ == 0:
+            print 'DiscriminatorModel: saved summaries'
+            self.summary_writer.add_summary(summaries, global_step)
+
+        return global_step
diff --git a/Code/d_scale_model.py b/Code/d_scale_model.py
new file mode 100644
index 0000000..766e01a
--- /dev/null
+++ b/Code/d_scale_model.py
@@ -0,0 +1,153 @@
+import tensorflow as tf
+from tfutils import w, b, conv_out_size
+import constants as c
+
+
+# noinspection PyShadowingNames
+class DScaleModel:
+    """
+    A DScaleModel is a network that takes as input one video frame and attempts to discriminate
+    whether or not the output frame is a real-world image or one generated by a generator network.
+    Multiple of these are used together in a DiscriminatorModel to make predictions on frames at
+    increasing scales.
+    """
+
+    def __init__(self, scale_index, height, width, conv_layer_fms, kernel_sizes, fc_layer_sizes):
+        """
+        Initializes the DScaleModel.
+
+        @param scale_index: The index number of this height in the GeneratorModel.
+        @param height: The height of the input images.
+        @param width: The width of the input images.
+        @param conv_layer_fms: The number of output feature maps for each convolution.
+        @param kernel_sizes: The size of the kernel for each convolutional layer.
+        @param fc_layer_sizes: The number of nodes in each fully-connected layer.
+
+        @type scale_index: int
+        @type height: int
+        @type width: int
+        @type conv_layer_fms: list<int>
+        @type kernel_sizes: list<int> (len = len(scale_layer_fms) - 1)
+        @type fc_layer_sizes: list<int>
+        """
+        assert len(kernel_sizes) == len(conv_layer_fms) - 1, \
+            'len(kernel_sizes) must = len(conv_layer_fms) - 1'
+
+        self.scale_index = scale_index
+        self.height = height
+        self.width = width
+        self.conv_layer_fms = conv_layer_fms
+        self.kernel_sizes = kernel_sizes
+        self.fc_layer_sizes = fc_layer_sizes
+
+        self.define_graph()
+
+    # noinspection PyAttributeOutsideInit
+    def define_graph(self):
+        """
+        Sets up the model graph in TensorFlow.
+        """
+
+        ##
+        # Input data
+        ##
+        with tf.name_scope('input'):
+            self.input_frames = tf.placeholder(
+                tf.float32, shape=[None, self.height, self.width, self.conv_layer_fms[0]])
+
+            # use variable batch_size for more flexibility
+            self.batch_size = tf.shape(self.input_frames)[0]
+
+        ##
+        # Layer setup
+        ##
+
+        with tf.name_scope('setup'):
+            # convolution
+            with tf.name_scope('convolutions'):
+                conv_ws = []
+                conv_bs = []
+                last_out_height = self.height
+                last_out_width = self.width
+                for i in xrange(len(self.kernel_sizes)):
+                    conv_ws.append(w([self.kernel_sizes[i],
+                                      self.kernel_sizes[i],
+                                      self.conv_layer_fms[i],
+                                      self.conv_layer_fms[i + 1]]))
+                    conv_bs.append(b([self.conv_layer_fms[i + 1]]))
+
+                    last_out_height = conv_out_size(
+                        last_out_height, c.PADDING_D, self.kernel_sizes[i], 1)
+                    last_out_width = conv_out_size(
+                        last_out_width, c.PADDING_D, self.kernel_sizes[i], 1)
+
+            # fully-connected
+            with tf.name_scope('full-connected'):
+                # Add in an initial layer to go from the last conv to the first fully-connected.
+                # Use /2 for the height and width because there is a 2x2 pooling layer
+                self.fc_layer_sizes.insert(
+                    0, (last_out_height / 2) * (last_out_width / 2) * self.conv_layer_fms[-1])
+
+                fc_ws = []
+                fc_bs = []
+                for i in xrange(len(self.fc_layer_sizes) - 1):
+                    fc_ws.append(w([self.fc_layer_sizes[i],
+                                    self.fc_layer_sizes[i + 1]]))
+                    fc_bs.append(b([self.fc_layer_sizes[i + 1]]))
+
+        ##
+        # Forward pass calculation
+        ##
+
+        def generate_predictions():
+            """
+            Runs self.input_frames through the network to generate a prediction from 0
+            (generated img) to 1 (real img).
+
+            @return: A tensor of predictions of shape [self.batch_size x 1].
+            """
+            with tf.name_scope('calculation'):
+                preds = tf.zeros([self.batch_size, 1])
+                last_input = self.input_frames
+
+                # convolutions
+                with tf.name_scope('convolutions'):
+                    for i in xrange(len(conv_ws)):
+                        # Convolve layer and activate with ReLU
+                        preds = tf.nn.conv2d(
+                            last_input, conv_ws[i], [1, 1, 1, 1], padding=c.PADDING_D)
+                        preds = tf.nn.relu(preds + conv_bs[i])
+
+                        last_input = preds
+
+                # pooling layer
+                with tf.name_scope('pooling'):
+                    preds = tf.nn.max_pool(preds, [1, 2, 2, 1], [1, 2, 2, 1], padding=c.PADDING_D)
+
+                # flatten preds for dense layers
+                shape = preds.get_shape().as_list()
+                # -1 can be used as one dimension to size dynamically
+                preds = tf.reshape(preds, [-1, shape[1] * shape[2] * shape[3]])
+
+                # fully-connected layers
+                with tf.name_scope('fully-connected'):
+                    for i in xrange(len(fc_ws)):
+                        preds = tf.matmul(preds, fc_ws[i]) + fc_bs[i]
+
+                        # Activate with ReLU (or Sigmoid for last layer)
+                        if i == len(fc_ws) - 1:
+                            preds = tf.sigmoid(preds)
+                        else:
+                            preds = tf.nn.relu(preds)
+
+                # clip preds between [.1, 0.9] for stability
+                with tf.name_scope('clip'):
+                    preds = tf.clip_by_value(preds, 0.1, 0.9)
+
+                return preds
+
+        self.preds = generate_predictions()
+
+        ##
+        # Training handled by DiscriminatorModel
+        ##
diff --git a/Code/g_model.py b/Code/g_model.py
new file mode 100644
index 0000000..eef24ab
--- /dev/null
+++ b/Code/g_model.py
@@ -0,0 +1,428 @@
+import tensorflow as tf
+import numpy as np
+from scipy.misc import imsave
+from skimage.transform import resize
+from copy import deepcopy
+
+import constants as c
+from loss_functions import combined_loss
+from utils import psnr_error, sharp_diff_error
+from tfutils import w, b
+
+# noinspection PyShadowingNames
+class GeneratorModel:
+    def __init__(self, session, summary_writer, height_train, width_train, height_test,
+                 width_test, scale_layer_fms, scale_kernel_sizes):
+        """
+        Initializes a GeneratorModel.
+
+        @param session: The TensorFlow Session.
+        @param summary_writer: The writer object to record TensorBoard summaries
+        @param height_train: The height of the input images for training.
+        @param width_train: The width of the input images for training.
+        @param height_train: The height of the input images for testing.
+        @param width_train: The width of the input images for testing.
+        @param scale_layer_fms: The number of feature maps in each layer of each scale network.
+        @param scale_kernel_sizes: The size of the kernel for each layer of each scale network.
+
+        @type session: tf.Session
+        @type summary_writer: tf.train.SummaryWriter
+        @type height_train: int
+        @type width_train: int
+        @type height_test: int
+        @type width_test: int
+        @type scale_layer_fms: list<list<int>>
+        @type scale_kernel_sizes: list<list<int>>
+        """
+        self.sess = session
+        self.summary_writer = summary_writer
+        self.height_train = height_train
+        self.width_train = width_train
+        self.height_test = height_test
+        self.width_test = width_test
+        self.scale_layer_fms = scale_layer_fms
+        self.scale_kernel_sizes = scale_kernel_sizes
+        self.num_scale_nets = len(scale_layer_fms)
+
+        self.define_graph()
+
+    # noinspection PyAttributeOutsideInit
+    def define_graph(self):
+        """
+        Sets up the model graph in TensorFlow.
+        """
+        with tf.name_scope('generator'):
+            ##
+            # Data
+            ##
+
+            with tf.name_scope('data'):
+                self.input_frames_train = tf.placeholder(
+                    tf.float32, shape=[None, self.height_train, self.width_train, 3 * c.HIST_LEN])
+                self.gt_frames_train = tf.placeholder(
+                    tf.float32, shape=[None, self.height_train, self.width_train, 3])
+
+                self.input_frames_test = tf.placeholder(
+                    tf.float32, shape=[None, self.height_test, self.width_test, 3 * c.HIST_LEN])
+                self.gt_frames_test = tf.placeholder(
+                    tf.float32, shape=[None, self.height_test, self.width_test, 3])
+
+                # use variable batch_size for more flexibility
+                self.batch_size_train = tf.shape(self.input_frames_train)[0]
+                self.batch_size_test = tf.shape(self.input_frames_test)[0]
+
+            ##
+            # Scale network setup and calculation
+            ##
+
+            self.summaries_train = []
+            self.scale_preds_train = []  # the generated images at each scale
+            self.scale_gts_train = []  # the ground truth images at each scale
+            self.d_scale_preds = []  # the predictions from the discriminator model
+
+            self.summaries_test = []
+            self.scale_preds_test = []  # the generated images at each scale
+            self.scale_gts_test = []  # the ground truth images at each scale
+
+            for scale_num in xrange(self.num_scale_nets):
+                with tf.name_scope('scale_' + str(scale_num)):
+                    with tf.name_scope('setup'):
+                        ws = []
+                        bs = []
+
+                        # create weights for kernels
+                        for i in xrange(len(self.scale_kernel_sizes[scale_num])):
+                            ws.append(w([self.scale_kernel_sizes[scale_num][i],
+                                         self.scale_kernel_sizes[scale_num][i],
+                                         self.scale_layer_fms[scale_num][i],
+                                         self.scale_layer_fms[scale_num][i + 1]]))
+                            bs.append(b([self.scale_layer_fms[scale_num][i + 1]]))
+
+                    with tf.name_scope('calculation'):
+                        def calculate(height, width, inputs, gts, last_gen_frames):
+                            # scale inputs and gts
+                            scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num)
+                            scale_height = int(height * scale_factor)
+                            scale_width = int(width * scale_factor)
+
+                            inputs = tf.image.resize_images(inputs, scale_height, scale_width)
+                            scale_gts = tf.image.resize_images(gts, scale_height, scale_width)
+
+                            # for all scales but the first, add the frame generated by the last
+                            # scale to the input
+                            if scale_num > 0:
+                                last_gen_frames = tf.image.resize_images(last_gen_frames,
+                                                                         scale_height,
+                                                                         scale_width)
+                                inputs = tf.concat(3, [inputs, last_gen_frames])
+
+                            # generated frame predictions
+                            preds = inputs
+
+                            # perform convolutions
+                            with tf.name_scope('convolutions'):
+                                for i in xrange(len(self.scale_kernel_sizes[scale_num])):
+                                    # Convolve layer
+                                    preds = tf.nn.conv2d(
+                                        preds, ws[i], [1, 1, 1, 1], padding=c.PADDING_G)
+
+                                    # Activate with ReLU (or Tanh for last layer)
+                                    if i == len(self.scale_kernel_sizes[scale_num]) - 1:
+                                        preds = tf.nn.tanh(preds + bs[i])
+                                    else:
+                                        preds = tf.nn.relu(preds + bs[i])
+
+                            return preds, scale_gts
+
+                        ##
+                        # Perform train calculation
+                        ##
+
+                        # for all scales but the first, add the frame generated by the last
+                        # scale to the input
+                        if scale_num > 0:
+                            last_scale_pred_train = self.scale_preds_train[scale_num - 1]
+                        else:
+                            last_scale_pred_train = None
+
+                        # calculate
+                        train_preds, train_gts = calculate(self.height_train,
+                                                           self.width_train,
+                                                           self.input_frames_train,
+                                                           self.gt_frames_train,
+                                                           last_scale_pred_train)
+                        self.scale_preds_train.append(train_preds)
+                        self.scale_gts_train.append(train_gts)
+
+                        # We need to run the network first to get generated frames, run the
+                        # discriminator on those frames to get d_scale_preds, then run this
+                        # again for the loss optimization.
+                        if c.ADVERSARIAL:
+                            self.d_scale_preds.append(tf.placeholder(tf.float32, [None, 1]))
+
+                        ##
+                        # Perform test calculation
+                        ##
+
+                        # for all scales but the first, add the frame generated by the last
+                        # scale to the input
+                        if scale_num > 0:
+                            last_scale_pred_test = self.scale_preds_test[scale_num - 1]
+                        else:
+                            last_scale_pred_test = None
+
+                        # calculate
+                        test_preds, test_gts = calculate(self.height_test,
+                                                         self.width_test,
+                                                         self.input_frames_test,
+                                                         self.gt_frames_test,
+                                                         last_scale_pred_test)
+                        self.scale_preds_test.append(test_preds)
+                        self.scale_gts_test.append(test_gts)
+
+            ##
+            # Training
+            ##
+
+            with tf.name_scope('train'):
+                # global loss is the combined loss from every scale network
+                self.global_loss = combined_loss(self.scale_preds_train,
+                                                 self.scale_gts_train,
+                                                 self.d_scale_preds)
+                self.global_step = tf.Variable(0, trainable=False)
+                self.optimizer = tf.train.AdamOptimizer(learning_rate=c.LRATE_G, name='optimizer')
+                self.train_op = self.optimizer.minimize(self.global_loss,
+                                                        global_step=self.global_step,
+                                                        name='train_op')
+
+                # train loss summary
+                loss_summary = tf.scalar_summary('train_loss_G', self.global_loss)
+                self.summaries_train.append(loss_summary)
+
+            ##
+            # Error
+            ##
+
+            with tf.name_scope('error'):
+                # error computation
+                # get error at largest scale
+                self.psnr_error_train = psnr_error(self.scale_preds_train[-1],
+                                                   self.gt_frames_train)
+                self.sharpdiff_error_train = sharp_diff_error(self.scale_preds_train[-1],
+                                                              self.gt_frames_train)
+                self.psnr_error_test = psnr_error(self.scale_preds_test[-1],
+                                                  self.gt_frames_test)
+                self.sharpdiff_error_test = sharp_diff_error(self.scale_preds_test[-1],
+                                                             self.gt_frames_test)
+                # train error summaries
+                summary_psnr_train = tf.scalar_summary('train_PSNR',
+                                                       self.psnr_error_train)
+                summary_sharpdiff_train = tf.scalar_summary('train_SharpDiff',
+                                                            self.sharpdiff_error_train)
+                self.summaries_train += [summary_psnr_train, summary_sharpdiff_train]
+
+                # test error
+                summary_psnr_test = tf.scalar_summary('test_PSNR',
+                                                      self.psnr_error_test)
+                summary_sharpdiff_test = tf.scalar_summary('test_SharpDiff',
+                                                           self.sharpdiff_error_test)
+                self.summaries_test += [summary_psnr_test, summary_sharpdiff_test]
+
+            # add summaries to visualize in TensorBoard
+            self.summaries_train = tf.merge_summary(self.summaries_train)
+            self.summaries_test = tf.merge_summary(self.summaries_test)
+
+    def train_step(self, batch, discriminator=None):
+        """
+        Runs a training step using the global loss on each of the scale networks.
+
+        @param batch: An array of shape
+                      [c.BATCH_SIZE x self.height x self.width x (3 * (c.HIST_LEN + 1))].
+                      The input and output frames, concatenated along the channel axis (index 3).
+        @param discriminator: The discriminator model. Default = None, if not adversarial.
+
+        @return: The global step.
+        """
+        ##
+        # Split into inputs and outputs
+        ##
+
+        input_frames = batch[:, :, :, :-3]
+        gt_frames = batch[:, :, :, -3:]
+
+        ##
+        # Train
+        ##
+
+        feed_dict = {self.input_frames_train: input_frames, self.gt_frames_train: gt_frames}
+
+        if c.ADVERSARIAL:
+            # Run the generator first to get generated frames
+            scale_preds = self.sess.run(self.scale_preds_train, feed_dict=feed_dict)
+
+            # Run the discriminator nets on those frames to get predictions
+            d_feed_dict = {}
+            for scale_num, gen_frames in enumerate(scale_preds):
+                d_feed_dict[discriminator.scale_nets[scale_num].input_frames] = gen_frames
+            d_scale_preds = self.sess.run(discriminator.scale_preds, feed_dict=d_feed_dict)
+
+            # Add discriminator predictions to the
+            for i, preds in enumerate(d_scale_preds):
+                feed_dict[self.d_scale_preds[i]] = preds
+
+        _, global_loss, global_psnr_error, global_sharpdiff_error, global_step, summaries = \
+            self.sess.run([self.train_op,
+                           self.global_loss,
+                           self.psnr_error_train,
+                           self.sharpdiff_error_train,
+                           self.global_step,
+                           self.summaries_train],
+                          feed_dict=feed_dict)
+
+        ##
+        # User output
+        ##
+        if global_step % c.STATS_FREQ == 0:
+            print 'GeneratorModel : Step ', global_step
+            print '                 Global Loss    : ', global_loss
+            print '                 PSNR Error     : ', global_psnr_error
+            print '                 Sharpdiff Error: ', global_sharpdiff_error
+        if global_step % c.SUMMARY_FREQ == 0:
+            self.summary_writer.add_summary(summaries, global_step)
+            print 'GeneratorModel: saved summaries'
+        if global_step % c.IMG_SAVE_FREQ == 0:
+            print '-' * 30
+            print 'Saving images...'
+
+            # if not adversarial, we didn't get the preds for each scale net before for the
+            # discriminator prediction, so do it now
+            if not c.ADVERSARIAL:
+                scale_preds = self.sess.run(self.scale_preds_train, feed_dict=feed_dict)
+
+            # re-generate scale gt_frames to avoid having to run through TensorFlow.
+            scale_gts = []
+            for scale_num in xrange(self.num_scale_nets):
+                scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num)
+                scale_height = int(self.height_train * scale_factor)
+                scale_width = int(self.width_train * scale_factor)
+
+                # resize gt_output_frames for scale and append to scale_gts_train
+                scaled_gt_frames = np.empty([c.BATCH_SIZE, scale_height, scale_width, 3])
+                for i, img in enumerate(gt_frames):
+                    # for skimage.transform.resize, images need to be in range [0, 1], so normalize
+                    # to [0, 1] before resize and back to [-1, 1] after
+                    sknorm_img = (img / 2) + 0.5
+                    resized_frame = resize(sknorm_img, [scale_height, scale_width, 3])
+                    scaled_gt_frames[i] = (resized_frame - 0.5) * 2
+                scale_gts.append(scaled_gt_frames)
+
+            # for every clip in the batch, save the inputs, scale preds and scale gts
+            for pred_num in xrange(len(input_frames)):
+                pred_dir = c.get_dir(c.IMG_SAVE_DIR + 'Step_' + str(global_step) + '/' + str(
+                    pred_num) + '/')
+
+                # save input images
+                for frame_num in xrange(c.HIST_LEN):
+                    img = input_frames[pred_num, :, :, (frame_num * 3):((frame_num + 1) * 3)]
+                    imsave(pred_dir + 'input_' + str(frame_num) + '.png', img)
+
+                # save preds and gts at each scale
+                # noinspection PyUnboundLocalVariable
+                for scale_num, scale_pred in enumerate(scale_preds):
+                    gen_img = scale_pred[pred_num]
+
+                    path = pred_dir + 'scale' + str(scale_num)
+                    gt_img = scale_gts[scale_num][pred_num]
+
+                    imsave(path + '_gen.png', gen_img)
+                    imsave(path + '_gt.png', gt_img)
+
+            print 'Saved images!'
+            print '-' * 30
+
+        return global_step
+
+    def test_batch(self, batch, global_step, num_rec_out=1, save_imgs=True):
+        """
+        Runs a training step using the global loss on each of the scale networks.
+
+        @param batch: An array of shape
+                      [batch_size x self.height x self.width x (3 * (c.HIST_LEN+ num_rec_out))].
+                      A batch of the input and output frames, concatenated along the channel axis
+                      (index 3).
+        @param global_step: The global step.
+        @param num_rec_out: The number of outputs to predict. Outputs > 1 are computed recursively,
+                            using previously-generated frames as input. Default = 1.
+        @param save_imgs: Whether or not to save the input/output images to file. Default = True.
+
+        @return: A tuple of (psnr error, sharpdiff error) for the batch.
+        """
+        if num_rec_out < 1:
+            raise ValueError('num_rec_out must be >= 1')
+
+        print '-' * 30
+        print 'Testing:'
+
+        ##
+        # Split into inputs and outputs
+        ##
+
+        input_frames = batch[:, :, :, :3 * c.HIST_LEN]
+        gt_frames = batch[:, :, :, 3 * c.HIST_LEN:]
+
+        ##
+        # Generate num_rec_out recursive predictions
+        ##
+
+        working_input_frames = deepcopy(input_frames)  # input frames that will shift w/ recursion
+        rec_preds = []
+        rec_summaries = []
+        for rec_num in xrange(num_rec_out):
+            working_gt_frames = gt_frames[:, :, :, 3 * rec_num:3 * (rec_num + 1)]
+
+            feed_dict = {self.input_frames_test: working_input_frames,
+                         self.gt_frames_test: working_gt_frames}
+            preds, psnr, sharpdiff, summaries = self.sess.run([self.scale_preds_test[-1],
+                                                               self.psnr_error_test,
+                                                               self.sharpdiff_error_test,
+                                                               self.summaries_test],
+                                                              feed_dict=feed_dict)
+
+            # remove first input and add new pred as last input
+            working_input_frames = np.concatenate(
+                [working_input_frames[:, :, :, 3:], preds], axis=3)
+
+            # add predictions and summaries
+            rec_preds.append(preds)
+            rec_summaries.append(summaries)
+
+            print 'Recursion ', rec_num
+            print 'PSNR Error     : ', psnr
+            print 'Sharpdiff Error: ', sharpdiff
+
+        # write summaries
+        # TODO: Think of a good way to write rec output summaries - rn, just using first output.
+        self.summary_writer.add_summary(rec_summaries[0], global_step)
+
+        ##
+        # Save images
+        ##
+
+        if save_imgs:
+            for pred_num in xrange(len(input_frames)):
+                pred_dir = c.get_dir(
+                    c.IMG_SAVE_DIR + 'Tests/Step_' + str(global_step) + '/' + str(pred_num) + '/')
+
+                # save input images
+                for frame_num in xrange(c.HIST_LEN):
+                    img = input_frames[pred_num, :, :, (frame_num * 3):((frame_num + 1) * 3)]
+                    imsave(pred_dir + 'input_' + str(frame_num) + '.png', img)
+
+                # save recursive outputs
+                for rec_num in xrange(num_rec_out):
+                    gen_img = rec_preds[rec_num][pred_num]
+                    gt_img = gt_frames[pred_num, :, :, 3 * rec_num:3 * (rec_num + 1)]
+                    imsave(pred_dir + 'gen_' + str(rec_num) + '.png', gen_img)
+                    imsave(pred_dir + 'gt_' + str(rec_num) + '.png', gt_img)
+
+        print '-' * 30
diff --git a/Code/loss_functions.py b/Code/loss_functions.py
new file mode 100644
index 0000000..994d226
--- /dev/null
+++ b/Code/loss_functions.py
@@ -0,0 +1,118 @@
+import tensorflow as tf
+import numpy as np
+
+from tfutils import log10
+import constants as c
+
+def combined_loss(gen_frames, gt_frames, d_preds, lam_adv=1, lam_lp=1, lam_gdl=1, l_num=2, alpha=2):
+    """
+    Calculates the sum of the combined adversarial, lp and GDL losses in the given proportion. Used
+    for training the generative model.
+
+    @param gen_frames: A list of tensors of the generated frames at each scale.
+    @param gt_frames: A list of tensors of the ground truth frames at each scale.
+    @param d_preds: A list of tensors of the classifications made by the discriminator model at each
+                    scale.
+    @param lam_adv: The percentage of the adversarial loss to use in the combined loss.
+    @param lam_lp: The percentage of the lp loss to use in the combined loss.
+    @param lam_gdl: The percentage of the GDL loss to use in the combined loss.
+    @param l_num: 1 or 2 for l1 and l2 loss, respectively).
+    @param alpha: The power to which each gradient term is raised in GDL loss.
+
+    @return: The combined adversarial, lp and GDL losses.
+    """
+    batch_size = tf.shape(gen_frames[0])[0]  # variable batch size as a tensor
+
+    loss = lam_lp * lp_loss(gen_frames, gt_frames, l_num)
+    loss += lam_gdl * gdl_loss(gen_frames, gt_frames, alpha)
+    if c.ADVERSARIAL: loss += lam_adv * adv_loss(d_preds, tf.ones([batch_size, 1]))
+
+    return loss
+
+
+def bce_loss(preds, targets):
+    """
+    Calculates the sum of binary cross-entropy losses between predictions and ground truths.
+
+    @param preds: A 1xN tensor. The predicted classifications of each frame.
+    @param targets: A 1xN tensor The target labels for each frame. (Either 1 or -1). Not "truths"
+                    because the generator passes in lies to determine how well it confuses the
+                    discriminator.
+
+    @return: The sum of binary cross-entropy losses.
+    """
+    return tf.squeeze(-1 * (tf.matmul(targets, log10(preds), transpose_a=True) +
+                            tf.matmul(1 - targets, log10(1 - preds), transpose_a=True)))
+
+
+def lp_loss(gen_frames, gt_frames, l_num):
+    """
+    Calculates the sum of lp losses between the predicted and ground truth frames.
+
+    @param gen_frames: The predicted frames at each scale.
+    @param gt_frames: The ground truth frames at each scale
+    @param l_num: 1 or 2 for l1 and l2 loss, respectively).
+
+    @return: The lp loss.
+    """
+    # calculate the loss for each scale
+    scale_losses = []
+    for i in xrange(len(gen_frames)):
+        scale_losses.append(tf.reduce_sum(tf.abs(gen_frames[i] - gt_frames[i])**l_num))
+
+    # condense into one tensor and avg
+    return tf.reduce_mean(tf.pack(scale_losses))
+
+
+def gdl_loss(gen_frames, gt_frames, alpha):
+    """
+    Calculates the sum of GDL losses between the predicted and ground truth frames.
+
+    @param gen_frames: The predicted frames at each scale.
+    @param gt_frames: The ground truth frames at each scale
+    @param alpha: The power to which each gradient term is raised.
+
+    @return: The GDL loss.
+    """
+    # calculate the loss for each scale
+    scale_losses = []
+    for i in xrange(len(gen_frames)):
+        # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
+        pos = tf.constant(np.identity(3), dtype=tf.float32)
+        neg = -1 * pos
+        filter_x = tf.expand_dims(tf.pack([neg, pos]), 0)  # [-1, 1]
+        filter_y = tf.pack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
+        strides = [1, 1, 1, 1]  # stride of (1, 1)
+        padding = 'SAME'
+
+        gen_dx = tf.abs(tf.nn.conv2d(gen_frames[i], filter_x, strides, padding=padding))
+        gen_dy = tf.abs(tf.nn.conv2d(gen_frames[i], filter_y, strides, padding=padding))
+        gt_dx = tf.abs(tf.nn.conv2d(gt_frames[i], filter_x, strides, padding=padding))
+        gt_dy = tf.abs(tf.nn.conv2d(gt_frames[i], filter_y, strides, padding=padding))
+
+        grad_diff_x = tf.abs(gt_dx - gen_dx)
+        grad_diff_y = tf.abs(gt_dy - gen_dy)
+
+        scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))
+
+    # condense into one tensor and avg
+    return tf.reduce_mean(tf.pack(scale_losses))
+
+
+def adv_loss(preds, labels):
+    """
+    Calculates the sum of BCE losses between the predicted classifications and true labels.
+
+    @param preds: The predicted classifications at each scale.
+    @param labels: The true labels. (Same for every scale).
+
+    @return: The adversarial loss.
+    """
+    # calculate the loss for each scale
+    scale_losses = []
+    for i in xrange(len(preds)):
+        loss = bce_loss(preds[i], labels)
+        scale_losses.append(loss)
+
+    # condense into one tensor and avg
+    return tf.reduce_mean(tf.pack(scale_losses))
diff --git a/Code/loss_functions_test.py b/Code/loss_functions_test.py
new file mode 100644
index 0000000..6b015f2
--- /dev/null
+++ b/Code/loss_functions_test.py
@@ -0,0 +1,304 @@
+from loss_functions import *
+
+sess = tf.Session()
+BATCH_SIZE = 2
+NUM_SCALES = 5
+MAX_P      = 5
+MAX_ALPHA  = 1
+
+
+# noinspection PyClassHasNoInit
+class TestBCELoss:
+    def test_false_correct(self):
+        targets = tf.constant(np.zeros([5, 1]))
+        preds = 1e-7 * tf.constant(np.ones([5, 1]))
+        res = sess.run(bce_loss(preds, targets))
+
+        log_con = np.log10(1 - 1e-7)
+        res_tru = -1 * np.sum(np.array([log_con] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_false_incorrect(self):
+        targets = tf.constant(np.zeros([5, 1]))
+        preds = tf.constant(np.ones([5, 1])) - 1e-7
+        res = sess.run(bce_loss(preds, targets))
+
+        log_con = np.log10(1e-7)
+        res_tru = -1 * np.sum(np.array([log_con] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_false_half(self):
+        targets = tf.constant(np.zeros([5, 1]))
+        preds = 0.5 * tf.constant(np.ones([5, 1]))
+        res = sess.run(bce_loss(preds, targets))
+
+        log_con = np.log10(0.5)
+        res_tru = -1 * np.sum(np.array([log_con] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_correct(self):
+        targets = tf.constant(np.ones([5, 1]))
+        preds = tf.constant(np.ones([5, 1])) - 1e-7
+        res = sess.run(bce_loss(preds, targets))
+
+        log = np.log10(1 - 1e-7)
+        res_tru = -1 * np.sum(np.array([log] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_incorrect(self):
+        targets = tf.constant(np.ones([5, 1]))
+        preds = 1e-7 * tf.constant(np.ones([5, 1]))
+        res = sess.run(bce_loss(preds, targets))
+
+        log = np.log10(1e-7)
+        res_tru = -1 * np.sum(np.array([log] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_half(self):
+        targets = tf.constant(np.ones([5, 1]))
+        preds = 0.5 * tf.constant(np.ones([5, 1]))
+        res = sess.run(bce_loss(preds, targets))
+
+        log = np.log10(0.5)
+        res_tru = -1 * np.sum(np.array([log] * 5))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+
+# noinspection PyClassHasNoInit
+class TestLPLoss:
+    def test_same_images(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            scale_preds.append(tf.constant(np.ones([BATCH_SIZE, 2**i, 2**i, 3])))
+            scale_truths.append(tf.constant(np.ones([BATCH_SIZE, 2**i, 2**i, 3])))
+
+        for p in xrange(1, MAX_P + 1):
+            res = sess.run(lp_loss(scale_preds, scale_truths, p))
+            assert res == res_tru, 'failed on p = %d' % p
+
+    def test_opposite_images(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            scale_preds.append(tf.constant(np.zeros([BATCH_SIZE, 2**i, 2 ** i, 3])))
+            scale_truths.append(tf.constant(np.ones([BATCH_SIZE, 2**i, 2 ** i, 3])))
+
+            res_tru += BATCH_SIZE * 2**i * 2**i * 3
+
+        for p in xrange(1, MAX_P + 1):
+            res = sess.run(lp_loss(scale_preds, scale_truths, p))
+            assert res == res_tru, 'failed on p = %d' % p
+
+    def test_some_correct(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            # generate batch of 3-deep identity matrices
+            preds = np.empty([BATCH_SIZE, 2**i, 2**i, 3])
+            imat = np.identity(2**i)
+            for elt in xrange(BATCH_SIZE):
+                preds[elt] = np.dstack([imat, imat, imat])
+
+            scale_preds.append(tf.constant(preds))
+            scale_truths.append(tf.constant(np.zeros([BATCH_SIZE, 2**i, 2**i, 3])))
+
+            res_tru += BATCH_SIZE * 2**i * 3
+
+        for p in xrange(1, MAX_P + 1):
+            res = sess.run(lp_loss(scale_preds, scale_truths, p))
+            assert res == res_tru, 'failed on p = %d' % p
+
+    def test_l_high(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            # opposite images
+            preds = np.empty([BATCH_SIZE, 2**i, 2**i, 3])
+            preds.fill(3)
+            scale_preds.append(tf.constant(preds))
+            scale_truths.append(tf.constant(np.zeros([BATCH_SIZE, 2**i, 2**i, 3])))
+
+            res_tru += BATCH_SIZE * 2**i * 2**i * 3
+
+        for p in xrange(1, MAX_P + 1):
+            res = sess.run(lp_loss(scale_preds, scale_truths, p))
+            assert res == res_tru * (3**p), 'failed on p = %d' % p
+
+
+# noinspection PyClassHasNoInit
+class TestGDLLoss:
+    def test_same_uniform(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            scale_preds.append(tf.ones([BATCH_SIZE, 2 ** i, 2 ** i, 3]))
+            scale_truths.append(tf.ones([BATCH_SIZE, 2 ** i, 2 ** i, 3]))
+
+        for a in xrange(1, MAX_ALPHA + 1):
+            res = sess.run(gdl_loss(scale_preds, scale_truths, a))
+            assert res == res_tru, 'failed on alpha = %d' % a
+
+    def test_same_nonuniform(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            # generate batch of 3-deep identity matrices
+            arr = np.empty([BATCH_SIZE, 2 ** i, 2 ** i, 3])
+            imat = np.identity(2 ** i)
+            for elt in xrange(BATCH_SIZE):
+                arr[elt] = np.dstack([imat, imat, imat])
+
+            scale_preds.append(tf.constant(arr, dtype=tf.float32))
+            scale_truths.append(tf.constant(arr, dtype=tf.float32))
+
+        for a in xrange(1, MAX_ALPHA + 1):
+            res = sess.run(gdl_loss(scale_preds, scale_truths, a))
+            assert res == res_tru, 'failed on alpha = %d' % a
+
+    # TODO: Not 0 loss as expected because the 1s array is padded by 0s, so there is some gradient.
+    def test_diff_uniform(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_tru = 0
+        for i in xrange(1, NUM_SCALES + 1):
+            scale_preds.append(tf.zeros([BATCH_SIZE, 2 ** i, 2 ** i, 3]))
+            scale_truths.append(tf.ones([BATCH_SIZE, 2 ** i, 2 ** i, 3]))
+
+            # every diff should have an abs value of 1, so no need for alpha handling
+            res_tru += BATCH_SIZE * 2 ** i * 2 * 3
+
+        for a in xrange(1, MAX_ALPHA + 1):
+            res = sess.run(gdl_loss(scale_preds, scale_truths, a))
+            assert res == res_tru, 'failed on alpha = %d' % a
+
+    def test_diff_one_uniform_one_not(self):
+        # generate scales
+        scale_preds = []
+        scale_truths = []
+
+        res_trus = np.zeros(MAX_ALPHA - 1)
+        for i in xrange(1, NUM_SCALES + 1):
+            # generate batch of 3-deep matrices with 3s on the diagonals
+            preds = np.empty([BATCH_SIZE, 2 ** i, 2 ** i, 3])
+            imat = np.identity(2 ** i) * 3
+            for elt in xrange(BATCH_SIZE):
+                preds[elt] = np.dstack([imat, imat, imat])
+
+            scale_preds.append(tf.constant(preds, dtype=tf.float32))
+            scale_truths.append(tf.zeros([BATCH_SIZE, 2 ** i, 2 ** i, 3]))
+
+            # every diff has an abs value of 3, so we can multiply that, raised to alpha
+            # for each alpha check, times the number of diffs in a batch:
+            # BATCH_SIZE * (diffs to left + down) * (diffs from up and right) * (# 3s in height) *
+            # (# channels)
+            num_diffs = BATCH_SIZE * 2 * 2 * 2**i * 3
+
+            for a in xrange(1, MAX_ALPHA):
+                res_trus[a] += num_diffs * 3**a
+
+        for a, res_tru in enumerate(res_trus):
+            res = sess.run(gdl_loss(scale_preds, scale_truths, a + 1))
+            assert res == res_tru, 'failed on alpha = %d' % (a + 1)
+
+
+# noinspection PyClassHasNoInit
+class TestAdvLoss:
+    def test_false_correct(self):
+        # generate scales
+        scale_preds = []
+        targets = tf.constant(np.zeros([5, 1]))
+
+        res_tru = 0
+        log_con = np.log10(1 - 1e-7)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(1e-7 * tf.constant(np.ones([5, 1])))
+            res_tru += -1 * np.sum(np.array([log_con] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_false_incorrect(self):
+        scale_preds = []
+        targets = tf.constant(np.zeros([5, 1]))
+
+        res_tru = 0
+        log_con = np.log10(1e-7)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(tf.constant(np.ones([5, 1])) - 1e-7)
+            res_tru += -1 * np.sum(np.array([log_con] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_false_half(self):
+        scale_preds = []
+        targets = tf.constant(np.zeros([5, 1]))
+
+        res_tru = 0
+        log_con = np.log10(0.5)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(0.5 * tf.constant(np.ones([5, 1])))
+            res_tru += -1 * np.sum(np.array([log_con] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_correct(self):
+        scale_preds = []
+        targets = tf.constant(np.ones([5, 1]))
+
+        res_tru = 0
+        log = np.log10(1 - 1e-7)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(tf.constant(np.ones([5, 1])) - 1e-7)
+            res_tru += -1 * np.sum(np.array([log] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_incorrect(self):
+        scale_preds = []
+        targets = tf.constant(np.ones([5, 1]))
+
+        res_tru = 0
+        log = np.log10(1e-7)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(1e-7 * tf.constant(np.ones([5, 1])))
+            res_tru += -1 * np.sum(np.array([log] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
+
+    def test_true_half(self):
+        scale_preds = []
+        targets = tf.constant(np.ones([5, 1]))
+
+        res_tru = 0
+        log = np.log10(0.5)
+        for i in xrange(NUM_SCALES):
+            scale_preds.append(0.5 * tf.constant(np.ones([5, 1])))
+            res_tru += -1 * np.sum(np.array([log] * 5))
+
+        res = sess.run(adv_loss(scale_preds, targets))
+        assert np.array_equal(np.around(res, 7), np.around(res_tru, 7))
diff --git a/Code/process_data.py b/Code/process_data.py
new file mode 100644
index 0000000..170959a
--- /dev/null
+++ b/Code/process_data.py
@@ -0,0 +1,71 @@
+import numpy as np
+import getopt
+import sys
+from glob import glob
+
+import constants as c
+from utils import process_clip
+
+
+def process_training_data(num_clips):
+    """
+    Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by
+    default.
+
+    @param num_clips: The number of clips to process. Default = 5000000 (set in __main__).
+
+    @warning: This can take a couple of hours to complete with large numbers of clips.
+    """
+    num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*'))
+
+    for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips):
+        clip = process_clip()
+
+        np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip)
+
+        if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1)
+
+
+def usage():
+    print 'Options:'
+    print '-n/--num_clips= <# clips to process for training>'
+    print '-t/--train_dir= <Directory of full training frames>'
+    print '-c/--clips_dir= <Save directory for processed clips>'
+    print "                (I suggest making this a hidden dir so the filesystem doesn't freeze"
+    print "                 with so many files. DON'T `ls` THIS DIRECTORY!)"
+    print '-o/--overwrite  (Overwrites the previous data in the training dir)'
+
+
+def main():
+    ##
+    # Handle command line input
+    ##
+
+    num_clips = 5000000
+
+    try:
+        opts, _ = getopt.getopt(sys.argv[1:], 'n:t:c:o',
+                                ['num_clips=', 'train_dir=', 'clips_dir=', 'overwrite'])
+    except getopt.GetoptError:
+        usage()
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt in ('-n', '--num_clips'):
+            num_clips = int(arg)
+        if opt in ('-t', '--train_dir'):
+            c.TRAIN_DIR = c.get_dir(arg)
+        if opt in ('-c', '--clips_dir'):
+            c.TRAIN_DIR_CLIPS = c.get_dir(arg)
+        if opt in ('-o', '--overwrite'):
+            c.clear_dir(c.TRAIN_DIR_CLIPS)
+
+    ##
+    # Process data for training
+    ##
+
+    process_training_data(num_clips)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Code/tfutils.py b/Code/tfutils.py
new file mode 100644
index 0000000..22baf95
--- /dev/null
+++ b/Code/tfutils.py
@@ -0,0 +1,133 @@
+import tensorflow as tf
+import numpy as np
+
+
+def w(shape, stddev=0.01):
+    """
+    @return A weight layer with the given shape and standard deviation. Initialized with a
+            truncated normal distribution.
+    """
+    return tf.Variable(tf.truncated_normal(shape, stddev=stddev))
+
+
+def b(shape, const=0.1):
+    """
+    @return A bias layer with the given shape.
+    """
+    return tf.Variable(tf.constant(const, shape=shape))
+
+
+def conv_out_size(i, p, k, s):
+    """
+    Gets the output size for a 2D convolution. (Assumes square input and kernel).
+
+    @param i: The side length of the input.
+    @param p: The padding type (either 'SAME' or 'VALID').
+    @param k: The side length of the kernel.
+    @param s: The stride.
+
+    @type i: int
+    @type p: string
+    @type k: int
+    @type s: int
+
+    @return The side length of the output.
+    """
+    # convert p to a number
+    if p == 'SAME':
+        p = k // 2
+    elif p == 'VALID':
+        p = 0
+    else:
+        raise ValueError('p must be "SAME" or "VALID".')
+
+    return int(((i + (2 * p) - k) / s) + 1)
+
+
+def log10(t):
+    """
+    Calculates the base-10 log of each element in t.
+
+    @param t: The tensor from which to calculate the base-10 log.
+
+    @return: A tensor with the base-10 log of each element in t.
+    """
+
+    numerator = tf.log(t)
+    denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
+    return numerator / denominator
+
+
+def batch_pad_to_bounding_box(images, offset_height, offset_width, target_height, target_width):
+    """
+    Zero-pads a batch of images with the given dimensions.
+
+    @param images: 4-D tensor with shape [batch_size, height, width, channels]
+    @param offset_height: Number of rows of zeros to add on top.
+    @param offset_width: Number of columns of zeros to add on the left.
+    @param target_height: Height of output images.
+    @param target_width: Width of output images.
+
+    @return: The batch of images, all zero-padded with the specified dimensions.
+    """
+    batch_size, height, width, channels = tf.Session().run(tf.shape(images))
+
+    if not offset_height >= 0:
+        raise ValueError('offset_height must be >= 0')
+    if not offset_width >= 0:
+        raise ValueError('offset_width must be >= 0')
+    if not target_height >= height + offset_height:
+        raise ValueError('target_height must be >= height + offset_height')
+    if not target_width >= width + offset_width:
+        raise ValueError('target_width must be >= width + offset_width')
+
+    num_tpad = offset_height
+    num_lpad = offset_width
+    num_bpad = target_height - (height + offset_height)
+    num_rpad = target_width - (width + offset_width)
+
+    tpad = np.zeros([batch_size, num_tpad, width, channels])
+    bpad = np.zeros([batch_size, num_bpad, width, channels])
+    lpad = np.zeros([batch_size, target_height, num_lpad, channels])
+    rpad = np.zeros([batch_size, target_height, num_rpad, channels])
+
+    padded = images
+    if num_tpad > 0 and num_bpad > 0: padded = tf.concat(1, [tpad, padded, bpad])
+    elif num_tpad > 0: padded = tf.concat(1, [tpad, padded])
+    elif num_bpad > 0: padded = tf.concat(1, [padded, bpad])
+    if num_lpad > 0 and num_rpad > 0: padded = tf.concat(2, [lpad, padded, rpad])
+    elif num_lpad > 0: padded = tf.concat(2, [lpad, padded])
+    elif num_rpad > 0: padded = tf.concat(2, [padded, rpad])
+
+    return padded
+
+
+def batch_crop_to_bounding_box(images, offset_height, offset_width, target_height, target_width):
+    """
+    Crops a batch of images to the given dimensions.
+
+    @param images: 4-D tensor with shape [batch, height, width, channels]
+    @param offset_height: Vertical coordinate of the top-left corner of the result in the input.
+    @param offset_width: Horizontal coordinate of the top-left corner of the result in the input.
+    @param target_height: Height of output images.
+    @param target_width: Width of output images.
+
+    @return: The batch of images, all cropped the specified dimensions.
+    """
+    batch_size, height, width, channels = tf.Session().run(tf.shape(images))
+
+    if not offset_height >= 0:
+        raise ValueError('offset_height must be >= 0')
+    if not offset_width >= 0:
+        raise ValueError('offset_width must be >= 0')
+    if not target_height + offset_height <= height:
+        raise ValueError('target_height + offset_height must be <= height')
+    if not target_width <= width - offset_width:
+        raise ValueError('target_width + offset_width must be <= width')
+
+    top = offset_height
+    bottom = target_height + offset_height
+    left = offset_width
+    right = target_width + offset_width
+
+    return images[:, top:bottom, left:right, :]
diff --git a/Code/tfutils_test.py b/Code/tfutils_test.py
new file mode 100644
index 0000000..4e2b490
--- /dev/null
+++ b/Code/tfutils_test.py
@@ -0,0 +1,102 @@
+from tfutils import *
+
+imgs = tf.constant(np.ones([2, 2, 2, 3]))
+sess = tf.Session()
+
+
+# noinspection PyClassHasNoInit,PyMethodMayBeStatic
+class TestPad:
+    def test_rb(self):
+        res = sess.run(batch_pad_to_bounding_box(imgs, 0, 0, 4, 4))
+        assert np.array_equal(res, np.array([[[[1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]]
+                                              ],
+                                             [[[1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]]
+                                              ]], dtype=float))
+
+    def test_center(self):
+        res = sess.run(batch_pad_to_bounding_box(imgs, 1, 1, 4, 4))
+        assert np.array_equal(res, np.array([[[[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]]
+                                              ],
+                                             [[[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1],
+                                               [1, 1, 1],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0],
+                                               [0, 0, 0]]
+                                              ]], dtype=float))
+
+
+padded = batch_pad_to_bounding_box(imgs, 1, 1, 4, 4)
+
+
+# noinspection PyClassHasNoInit
+class TestCrop:
+    def test_rb(self):
+        res = sess.run(batch_crop_to_bounding_box(padded, 0, 0, 2, 2))
+        assert np.array_equal(res, np.array([[[[0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1]]],
+                                             [[[0, 0, 0],
+                                               [0, 0, 0]],
+                                              [[0, 0, 0],
+                                               [1, 1, 1]]]]))
+
+    def test_center(self):
+        res = sess.run(batch_crop_to_bounding_box(padded, 1, 1, 2, 2))
+        assert np.array_equal(res, np.ones([2, 2, 2, 3]))
diff --git a/Code/utils.py b/Code/utils.py
new file mode 100644
index 0000000..2b97bdb
--- /dev/null
+++ b/Code/utils.py
@@ -0,0 +1,212 @@
+import tensorflow as tf
+import numpy as np
+from scipy.ndimage import imread
+from glob import glob
+
+import constants as c
+from tfutils import log10
+
+##
+# Data
+##
+
+def normalize_frames(frames):
+    """
+    Convert frames from int8 [0, 255] to float32 [-1, 1].
+
+    @param frames: A numpy array. The frames to be converted.
+
+    @return: The normalized frames.
+    """
+    new_frames = frames.astype(np.float32)
+    new_frames /= (255 / 2)
+    new_frames -= 1
+
+    return new_frames
+
+
+def denormalize_frames(frames):
+    """
+    Performs the inverse operation of normalize_frames.
+
+    @param frames: A numpy array. The frames to be converted.
+
+    @return: The denormalized frames.
+    """
+    new_frames = frames + 1
+    new_frames *= (255 / 2)
+    # noinspection PyUnresolvedReferences
+    new_frames = new_frames.astype(np.uint8)
+
+    return new_frames
+
+def clip_l2_diff(clip):
+    """
+    @param clip: A numpy array of shape [c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + 1))].
+    @return: The sum of l2 differences between the frame pixels of each sequential pair of frames.
+    """
+    diff = 0
+    for i in xrange(c.HIST_LEN):
+        frame = clip[:, :, 3 * i:3 * (i + 1)]
+        next_frame = clip[:, :, 3 * (i + 1):3 * (i + 2)]
+        # noinspection PyTypeChecker
+        diff += np.sum(np.square(next_frame - frame))
+
+    return diff
+
+def get_full_clips(data_dir, num_clips, num_rec_out=1):
+    """
+    Loads a batch of random clips from the unprocessed train or test data.
+
+    @param data_dir: The directory of the data to read. Should be either c.TRAIN_DIR or c.TEST_DIR.
+    @param num_clips: The number of clips to read.
+    @param num_rec_out: The number of outputs to predict. Outputs > 1 are computed recursively,
+                        using the previously-generated frames as input. Default = 1.
+
+    @return: An array of shape
+             [num_clips, c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + num_rec_out))].
+             A batch of frame sequences with values normalized in range [-1, 1].
+    """
+    clips = np.empty([num_clips,
+                      c.TEST_HEIGHT,
+                      c.TEST_WIDTH,
+                      (3 * (c.HIST_LEN + num_rec_out))])
+
+    # get num_clips random episodes
+    ep_dirs = np.random.choice(glob(data_dir + '*'), num_clips)
+
+    # get a random clip of length HIST_LEN + 1 from each episode
+    for clip_num, ep_dir in enumerate(ep_dirs):
+        ep_frame_paths = glob(ep_dir + '/*')
+        start_index = np.random.choice(len(ep_frame_paths) - (c.HIST_LEN + num_rec_out - 1))
+        clip_frame_paths = ep_frame_paths[start_index:start_index + (c.HIST_LEN + num_rec_out)]
+
+        # read in frames
+        for frame_num, frame_path in enumerate(clip_frame_paths):
+            frame = imread(frame_path, mode='RGB')
+            norm_frame = normalize_frames(frame)
+
+            clips[clip_num, :, :, frame_num * 3:(frame_num + 1) * 3] = norm_frame
+
+    return clips
+
+def process_clip():
+    """
+    Gets a clip from the train dataset, cropped randomly to c.TRAIN_HEIGHT x c.TRAIN_WIDTH.
+
+    @return: An array of shape [c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + 1))].
+             A frame sequence with values normalized in range [-1, 1].
+    """
+    clip = get_full_clips(c.TRAIN_DIR, 1)[0]
+
+    # Randomly crop the clip. With 0.05 probability, take the first crop offered, otherwise,
+    # repeat until we have a clip with movement in it.
+    take_first = np.random.choice(2, p=[0.95, 0.05])
+    cropped_clip = np.empty([c.TRAIN_HEIGHT, c.TRAIN_WIDTH, 3 * (c.HIST_LEN + 1)])
+    for i in xrange(100):  # cap at 100 trials in case the clip has no movement anywhere
+        crop_x = np.random.choice(c.TEST_WIDTH - c.TRAIN_WIDTH + 1)
+        crop_y = np.random.choice(c.TEST_HEIGHT - c.TRAIN_HEIGHT + 1)
+        cropped_clip = clip[crop_y:crop_y + c.TRAIN_HEIGHT, crop_x:crop_x + c.TRAIN_WIDTH, :]
+
+        if take_first or clip_l2_diff(cropped_clip) > c.MOVEMENT_THRESHOLD:
+            break
+
+    return cropped_clip
+
+def get_train_batch():
+    """
+    Loads c.BATCH_SIZE clips from the database of preprocessed training clips.
+
+    @return: An array of shape
+            [c.BATCH_SIZE, c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + 1))].
+    """
+    clips = np.empty([c.BATCH_SIZE, c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + 1))],
+                     dtype=np.float32)
+    for i in xrange(c.BATCH_SIZE):
+        path = c.TRAIN_DIR_CLIPS + str(np.random.choice(c.NUM_CLIPS)) + '.npz'
+        clip = np.load(path)['arr_0']
+
+        clips[i] = clip
+
+    return clips
+
+
+def get_test_batch(test_batch_size, num_rec_out=1):
+    """
+    Gets a clip from the test dataset.
+
+    @param test_batch_size: The number of clips.
+    @param num_rec_out: The number of outputs to predict. Outputs > 1 are computed recursively,
+                        using the previously-generated frames as input. Default = 1.
+
+    @return: An array of shape:
+             [test_batch_size, c.TEST_HEIGHT, c.TEST_WIDTH, (3 * (c.HIST_LEN + num_rec_out))].
+             A batch of frame sequences with values normalized in range [-1, 1].
+    """
+    return get_full_clips(c.TEST_DIR, test_batch_size, num_rec_out=num_rec_out)
+
+
+##
+# Error calculation
+##
+
+# TODO: Add SSIM error http://www.cns.nyu.edu/pub/eero/wang03-reprint.pdf
+# TODO: Unit test error functions.
+
+def psnr_error(gen_frames, gt_frames):
+    """
+    Computes the Peak Signal to Noise Ratio error between the generated images and the ground
+    truth images.
+
+    @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
+                       generator model.
+    @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
+                      each frame in gen_frames.
+
+    @return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the
+             batch.
+    """
+    shape = tf.shape(gen_frames)
+    num_pixels = tf.to_float(shape[1] * shape[2])
+    square_diff = tf.square(gt_frames - gen_frames)
+
+    batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3])))
+    return tf.reduce_mean(batch_errors)
+
+def sharp_diff_error(gen_frames, gt_frames):
+    """
+    Computes the Sharpness Difference error between the generated images and the ground truth
+    images.
+
+    @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
+                       generator model.
+    @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
+                      each frame in gen_frames.
+
+    @return: A scalar tensor. The Sharpness Difference error over each frame in the batch.
+    """
+    shape = tf.shape(gen_frames)
+    num_pixels = tf.to_float(shape[1] * shape[2])
+
+    # gradient difference
+    # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
+    # TODO: Could this be simplified with one filter [[-1, 2], [0, -1]]?
+    pos = tf.constant(np.identity(3), dtype=tf.float32)
+    neg = -1 * pos
+    filter_x = tf.expand_dims(tf.pack([neg, pos]), 0)  # [-1, 1]
+    filter_y = tf.pack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
+    strides = [1, 1, 1, 1]  # stride of (1, 1)
+    padding = 'SAME'
+
+    gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
+    gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
+    gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
+    gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))
+
+    gen_grad_sum = gen_dx + gen_dy
+    gt_grad_sum = gt_dx + gt_dy
+
+    grad_diff = tf.abs(gt_grad_sum - gen_grad_sum)
+
+    batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(grad_diff, [1, 2, 3])))
+    return tf.reduce_mean(batch_errors)
author	Matt Cooper <matthew_cooper@brown.edu>	2016-08-12 16:48:46 -0400
committer	Matt Cooper <matthew_cooper@brown.edu>	2016-08-12 16:48:46 -0400
commit	0a3fd5b62065333669c7b391c626cb2505217617 (patch)
tree	04be2e559272d62e22c08258d0c72d759a00265d /Code