summaryrefslogtreecommitdiff
path: root/Code/d_scale_model.py
diff options
context:
space:
mode:
Diffstat (limited to 'Code/d_scale_model.py')
-rw-r--r--Code/d_scale_model.py153
1 files changed, 153 insertions, 0 deletions
diff --git a/Code/d_scale_model.py b/Code/d_scale_model.py
new file mode 100644
index 0000000..766e01a
--- /dev/null
+++ b/Code/d_scale_model.py
@@ -0,0 +1,153 @@
+import tensorflow as tf
+from tfutils import w, b, conv_out_size
+import constants as c
+
+
+# noinspection PyShadowingNames
+class DScaleModel:
+ """
+ A DScaleModel is a network that takes as input one video frame and attempts to discriminate
+ whether or not the output frame is a real-world image or one generated by a generator network.
+ Multiple of these are used together in a DiscriminatorModel to make predictions on frames at
+ increasing scales.
+ """
+
+ def __init__(self, scale_index, height, width, conv_layer_fms, kernel_sizes, fc_layer_sizes):
+ """
+ Initializes the DScaleModel.
+
+ @param scale_index: The index number of this height in the GeneratorModel.
+ @param height: The height of the input images.
+ @param width: The width of the input images.
+ @param conv_layer_fms: The number of output feature maps for each convolution.
+ @param kernel_sizes: The size of the kernel for each convolutional layer.
+ @param fc_layer_sizes: The number of nodes in each fully-connected layer.
+
+ @type scale_index: int
+ @type height: int
+ @type width: int
+ @type conv_layer_fms: list<int>
+ @type kernel_sizes: list<int> (len = len(scale_layer_fms) - 1)
+ @type fc_layer_sizes: list<int>
+ """
+ assert len(kernel_sizes) == len(conv_layer_fms) - 1, \
+ 'len(kernel_sizes) must = len(conv_layer_fms) - 1'
+
+ self.scale_index = scale_index
+ self.height = height
+ self.width = width
+ self.conv_layer_fms = conv_layer_fms
+ self.kernel_sizes = kernel_sizes
+ self.fc_layer_sizes = fc_layer_sizes
+
+ self.define_graph()
+
+ # noinspection PyAttributeOutsideInit
+ def define_graph(self):
+ """
+ Sets up the model graph in TensorFlow.
+ """
+
+ ##
+ # Input data
+ ##
+ with tf.name_scope('input'):
+ self.input_frames = tf.placeholder(
+ tf.float32, shape=[None, self.height, self.width, self.conv_layer_fms[0]])
+
+ # use variable batch_size for more flexibility
+ self.batch_size = tf.shape(self.input_frames)[0]
+
+ ##
+ # Layer setup
+ ##
+
+ with tf.name_scope('setup'):
+ # convolution
+ with tf.name_scope('convolutions'):
+ conv_ws = []
+ conv_bs = []
+ last_out_height = self.height
+ last_out_width = self.width
+ for i in xrange(len(self.kernel_sizes)):
+ conv_ws.append(w([self.kernel_sizes[i],
+ self.kernel_sizes[i],
+ self.conv_layer_fms[i],
+ self.conv_layer_fms[i + 1]]))
+ conv_bs.append(b([self.conv_layer_fms[i + 1]]))
+
+ last_out_height = conv_out_size(
+ last_out_height, c.PADDING_D, self.kernel_sizes[i], 1)
+ last_out_width = conv_out_size(
+ last_out_width, c.PADDING_D, self.kernel_sizes[i], 1)
+
+ # fully-connected
+ with tf.name_scope('full-connected'):
+ # Add in an initial layer to go from the last conv to the first fully-connected.
+ # Use /2 for the height and width because there is a 2x2 pooling layer
+ self.fc_layer_sizes.insert(
+ 0, (last_out_height / 2) * (last_out_width / 2) * self.conv_layer_fms[-1])
+
+ fc_ws = []
+ fc_bs = []
+ for i in xrange(len(self.fc_layer_sizes) - 1):
+ fc_ws.append(w([self.fc_layer_sizes[i],
+ self.fc_layer_sizes[i + 1]]))
+ fc_bs.append(b([self.fc_layer_sizes[i + 1]]))
+
+ ##
+ # Forward pass calculation
+ ##
+
+ def generate_predictions():
+ """
+ Runs self.input_frames through the network to generate a prediction from 0
+ (generated img) to 1 (real img).
+
+ @return: A tensor of predictions of shape [self.batch_size x 1].
+ """
+ with tf.name_scope('calculation'):
+ preds = tf.zeros([self.batch_size, 1])
+ last_input = self.input_frames
+
+ # convolutions
+ with tf.name_scope('convolutions'):
+ for i in xrange(len(conv_ws)):
+ # Convolve layer and activate with ReLU
+ preds = tf.nn.conv2d(
+ last_input, conv_ws[i], [1, 1, 1, 1], padding=c.PADDING_D)
+ preds = tf.nn.relu(preds + conv_bs[i])
+
+ last_input = preds
+
+ # pooling layer
+ with tf.name_scope('pooling'):
+ preds = tf.nn.max_pool(preds, [1, 2, 2, 1], [1, 2, 2, 1], padding=c.PADDING_D)
+
+ # flatten preds for dense layers
+ shape = preds.get_shape().as_list()
+ # -1 can be used as one dimension to size dynamically
+ preds = tf.reshape(preds, [-1, shape[1] * shape[2] * shape[3]])
+
+ # fully-connected layers
+ with tf.name_scope('fully-connected'):
+ for i in xrange(len(fc_ws)):
+ preds = tf.matmul(preds, fc_ws[i]) + fc_bs[i]
+
+ # Activate with ReLU (or Sigmoid for last layer)
+ if i == len(fc_ws) - 1:
+ preds = tf.sigmoid(preds)
+ else:
+ preds = tf.nn.relu(preds)
+
+ # clip preds between [.1, 0.9] for stability
+ with tf.name_scope('clip'):
+ preds = tf.clip_by_value(preds, 0.1, 0.9)
+
+ return preds
+
+ self.preds = generate_predictions()
+
+ ##
+ # Training handled by DiscriminatorModel
+ ##