summaryrefslogtreecommitdiff
path: root/Codes/flownet2/src
diff options
context:
space:
mode:
authorStevenLiuWen <liuwen@shanghaitech.edu.cn>2018-03-13 03:28:06 -0400
committerStevenLiuWen <liuwen@shanghaitech.edu.cn>2018-03-13 03:28:06 -0400
commitfede6ca1dd0077ff509d84bd24028cc7a93bb119 (patch)
treeaf7f6e759b5dec4fc2964daed09e903958b919ed /Codes/flownet2/src
first commit
Diffstat (limited to 'Codes/flownet2/src')
-rw-r--r--Codes/flownet2/src/__init__.py0
-rw-r--r--Codes/flownet2/src/correlation.py35
-rw-r--r--Codes/flownet2/src/dataloader.py329
-rw-r--r--Codes/flownet2/src/dataset_configs.py153
-rw-r--r--Codes/flownet2/src/downsample.py8
-rw-r--r--Codes/flownet2/src/flow_warp.py15
-rw-r--r--Codes/flownet2/src/flowlib.py554
-rw-r--r--Codes/flownet2/src/flownet2/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet2/flownet2.py118
-rw-r--r--Codes/flownet2/src/flownet2/test.py51
-rw-r--r--Codes/flownet2/src/flownet2/train.py24
-rw-r--r--Codes/flownet2/src/flownet_c/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_c/flownet_c.py167
-rw-r--r--Codes/flownet2/src/flownet_c/test.py51
-rw-r--r--Codes/flownet2/src/flownet_c/train.py19
-rw-r--r--Codes/flownet2/src/flownet_cs/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_cs/flownet_cs.py41
-rw-r--r--Codes/flownet2/src/flownet_cs/test.py51
-rw-r--r--Codes/flownet2/src/flownet_cs/train.py21
-rw-r--r--Codes/flownet2/src/flownet_css/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_css/flownet_css.py41
-rw-r--r--Codes/flownet2/src/flownet_css/test.py51
-rw-r--r--Codes/flownet2/src/flownet_css/train.py22
-rw-r--r--Codes/flownet2/src/flownet_s/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_s/flownet_s.py161
-rw-r--r--Codes/flownet2/src/flownet_s/test.py51
-rw-r--r--Codes/flownet2/src/flownet_s/train.py19
-rw-r--r--Codes/flownet2/src/flownet_sd/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_sd/flownet_sd.py160
-rw-r--r--Codes/flownet2/src/flownet_sd/test.py51
-rw-r--r--Codes/flownet2/src/flownet_sd/train.py19
-rw-r--r--Codes/flownet2/src/net.py177
-rw-r--r--Codes/flownet2/src/ops/build/.gitkeep0
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc160
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc262
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.cc137
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc153
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.h77
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_op.cc83
-rw-r--r--Codes/flownet2/src/ops/correlation/pad.cu.cc76
-rw-r--r--Codes/flownet2/src/ops/correlation/pad.h20
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel.cc47
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel.h18
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc108
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_op.cc30
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.cc48
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc130
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.h28
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc57
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc126
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc23
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc420
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h228
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc461
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc348
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h22
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc129
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h19
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc95
-rw-r--r--Codes/flownet2/src/ops/preprocessing/preprocessing.cc96
-rw-r--r--Codes/flownet2/src/training_schedules.py12
-rw-r--r--Codes/flownet2/src/utils.py46
62 files changed, 5848 insertions, 0 deletions
diff --git a/Codes/flownet2/src/__init__.py b/Codes/flownet2/src/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/__init__.py
diff --git a/Codes/flownet2/src/correlation.py b/Codes/flownet2/src/correlation.py
new file mode 100644
index 0000000..60a5c37
--- /dev/null
+++ b/Codes/flownet2/src/correlation.py
@@ -0,0 +1,35 @@
+import tensorflow as tf
+
+_correlation_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/correlation.so"))
+
+
+def correlation(input_a, input_b, kernel_size, max_displacement, stride_1, stride_2, padding):
+ return _correlation_ops.correlation(input_a,
+ input_b,
+ kernel_size,
+ max_displacement,
+ stride_1,
+ stride_2,
+ padding)
+
+
+@tf.RegisterGradient("Correlation")
+def _correlation_grad(corr_op, gradients):
+ kernel_size = corr_op.get_attr("kernel_size")
+ max_displacement = corr_op.get_attr("max_displacement")
+ stride_1 = corr_op.get_attr("stride_1")
+ stride_2 = corr_op.get_attr("stride_2")
+ pad = corr_op.get_attr("pad")
+
+ corr_grads = _correlation_ops.correlation_grad(gradients,
+ corr_op.inputs[0],
+ corr_op.inputs[1],
+ kernel_size,
+ max_displacement,
+ stride_1,
+ stride_2,
+ pad)
+
+ # Return the gradients with respect to input_a and input_b
+ return corr_grads.backprops_a, corr_grads.backprops_b
diff --git a/Codes/flownet2/src/dataloader.py b/Codes/flownet2/src/dataloader.py
new file mode 100644
index 0000000..22a6ddb
--- /dev/null
+++ b/Codes/flownet2/src/dataloader.py
@@ -0,0 +1,329 @@
+# -*- coding: utf-8 -*-
+import tensorflow as tf
+import copy
+slim = tf.contrib.slim
+
+_preprocessing_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/preprocessing.so"))
+
+
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+class Image(slim.tfexample_decoder.ItemHandler):
+ """An ItemHandler that decodes a parsed Tensor as an image."""
+
+ def __init__(self,
+ image_key=None,
+ format_key=None,
+ shape=None,
+ channels=3,
+ dtype=tf.uint8,
+ repeated=False):
+ """Initializes the image.
+ Args:
+ image_key: the name of the TF-Example feature in which the encoded image
+ is stored.
+ shape: the output shape of the image as 1-D `Tensor`
+ [height, width, channels]. If provided, the image is reshaped
+ accordingly. If left as None, no reshaping is done. A shape should
+ be supplied only if all the stored images have the same shape.
+ channels: the number of channels in the image.
+ dtype: images will be decoded at this bit depth. Different formats
+ support different bit depths.
+ See tf.image.decode_image,
+ tf.decode_raw,
+ repeated: if False, decodes a single image. If True, decodes a
+ variable number of image strings from a 1D tensor of strings.
+ """
+ if not image_key:
+ image_key = 'image/encoded'
+
+ super(Image, self).__init__([image_key])
+ self._image_key = image_key
+ self._shape = shape
+ self._channels = channels
+ self._dtype = dtype
+ self._repeated = repeated
+
+ def tensors_to_item(self, keys_to_tensors):
+ """See base class."""
+ image_buffer = keys_to_tensors[self._image_key]
+
+ if self._repeated:
+ return functional_ops.map_fn(lambda x: self._decode(x),
+ image_buffer, dtype=self._dtype)
+ else:
+ return self._decode(image_buffer)
+
+ def _decode(self, image_buffer):
+ """Decodes the image buffer.
+ Args:
+ image_buffer: The tensor representing the encoded image tensor.
+ Returns:
+ A tensor that represents decoded image of self._shape, or
+ (?, ?, self._channels) if self._shape is not specified.
+ """
+ def decode_raw():
+ """Decodes a raw image."""
+ return tf.decode_raw(image_buffer, out_type=self._dtype)
+
+ image = decode_raw()
+ # image.set_shape([None, None, self._channels])
+ if self._shape is not None:
+ image = tf.reshape(image, self._shape)
+
+ return image
+
+
+def __get_dataset(dataset_config, split_name):
+ """
+ dataset_config: A dataset_config defined in datasets.py
+ split_name: 'train'/'validate'
+ """
+ with tf.name_scope('__get_dataset'):
+ if split_name not in dataset_config['SIZES']:
+ raise ValueError('split name %s not recognized' % split_name)
+
+ IMAGE_HEIGHT, IMAGE_WIDTH = dataset_config['IMAGE_HEIGHT'], dataset_config['IMAGE_WIDTH']
+ reader = tf.TFRecordReader
+ keys_to_features = {
+ 'image_a': tf.FixedLenFeature((), tf.string),
+ 'image_b': tf.FixedLenFeature((), tf.string),
+ 'flow': tf.FixedLenFeature((), tf.string),
+ }
+ items_to_handlers = {
+ 'image_a': Image(
+ image_key='image_a',
+ dtype=tf.float64,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
+ channels=3),
+ 'image_b': Image(
+ image_key='image_b',
+ dtype=tf.float64,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
+ channels=3),
+ 'flow': Image(
+ image_key='flow',
+ dtype=tf.float32,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 2],
+ channels=2),
+ }
+ decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
+ return slim.dataset.Dataset(
+ data_sources=dataset_config['PATHS'][split_name],
+ reader=reader,
+ decoder=decoder,
+ num_samples=dataset_config['SIZES'][split_name],
+ items_to_descriptions=dataset_config['ITEMS_TO_DESCRIPTIONS'])
+
+
+def config_to_arrays(dataset_config):
+ output = {
+ 'name': [],
+ 'rand_type': [],
+ 'exp': [],
+ 'mean': [],
+ 'spread': [],
+ 'prob': [],
+ 'coeff_schedule': [],
+ }
+ config = copy.deepcopy(dataset_config)
+
+ if 'coeff_schedule_param' in config:
+ del config['coeff_schedule_param']
+
+ # Get all attributes
+ for (name, value) in config.iteritems():
+ if name == 'coeff_schedule_param':
+ output['coeff_schedule'] = [value['half_life'],
+ value['initial_coeff'],
+ value['final_coeff']]
+ else:
+ output['name'].append(name)
+ output['rand_type'].append(value['rand_type'])
+ output['exp'].append(value['exp'])
+ output['mean'].append(value['mean'])
+ output['spread'].append(value['spread'])
+ output['prob'].append(value['prob'])
+
+ return output
+
+
+# https://github.com/tgebru/transform/blob/master/src/caffe/layers/data_augmentation_layer.cpp#L34
+def _generate_coeff(param, discount_coeff=tf.constant(1.0), default_value=tf.constant(0.0)):
+ if not all(name in param for name in ['rand_type', 'exp', 'mean', 'spread', 'prob']):
+ raise RuntimeError('Expected rand_type, exp, mean, spread, prob in `param`')
+
+ rand_type = param['rand_type']
+ exp = float(param['exp'])
+ mean = tf.convert_to_tensor(param['mean'], dtype=tf.float32)
+ spread = float(param['spread']) # AKA standard deviation
+ prob = float(param['prob'])
+
+ # Multiply spread by our discount_coeff so it changes over time
+ spread = spread * discount_coeff
+
+ if rand_type == 'uniform':
+ value = tf.cond(spread > 0.0,
+ lambda: tf.random_uniform([], mean - spread, mean + spread),
+ lambda: mean)
+ if exp:
+ value = tf.exp(value)
+ elif rand_type == 'gaussian':
+ value = tf.cond(spread > 0.0,
+ lambda: tf.random_normal([], mean, spread),
+ lambda: mean)
+ if exp:
+ value = tf.exp(value)
+ elif rand_type == 'bernoulli':
+ if prob > 0.0:
+ value = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ value = 0.0
+ elif rand_type == 'uniform_bernoulli':
+ tmp1 = 0.0
+ tmp2 = 0
+ if prob > 0.0:
+ tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ tmp2 = 0
+
+ if tmp2 == 0:
+ if default_value is not None:
+ return default_value
+ else:
+ tmp1 = tf.cond(spread > 0.0,
+ lambda: tf.random_uniform([], mean - spread, mean + spread),
+ lambda: mean)
+ if exp:
+ tmp1 = tf.exp(tmp1)
+ value = tmp1
+ elif rand_type == 'gaussian_bernoulli':
+ tmp1 = 0.0
+ tmp2 = 0
+ if prob > 0.0:
+ tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ tmp2 = 0
+
+ if tmp2 == 0:
+ if default_value is not None:
+ return default_value
+ else:
+ tmp1 = tf.cond(spread > 0.0,
+ lambda: tf.random_normal([], mean, spread),
+ lambda: mean)
+ if exp:
+ tmp1 = tf.exp(tmp1)
+ value = tmp1
+ else:
+ raise ValueError('Unknown distribution type %s.' % rand_type)
+ return value
+
+
+def load_batch(dataset_config, split_name, global_step):
+ num_threads = 32
+ reader_kwargs = {'options': tf.python_io.TFRecordOptions(
+ tf.python_io.TFRecordCompressionType.ZLIB)}
+
+ with tf.name_scope('load_batch'):
+ dataset = __get_dataset(dataset_config, split_name)
+ data_provider = slim.dataset_data_provider.DatasetDataProvider(
+ dataset,
+ num_readers=num_threads,
+ common_queue_capacity=2048,
+ common_queue_min=1024,
+ reader_kwargs=reader_kwargs)
+ image_a, image_b, flow = data_provider.get(['image_a', 'image_b', 'flow'])
+ image_a, image_b, flow = map(tf.to_float, [image_a, image_b, flow])
+
+ if dataset_config['PREPROCESS']['scale']:
+ image_a = image_a / 255.0
+ image_b = image_b / 255.0
+
+ crop = [dataset_config['PREPROCESS']['crop_height'],
+ dataset_config['PREPROCESS']['crop_width']]
+ config_a = config_to_arrays(dataset_config['PREPROCESS']['image_a'])
+ config_b = config_to_arrays(dataset_config['PREPROCESS']['image_b'])
+
+ image_as, image_bs, flows = map(lambda x: tf.expand_dims(x, 0), [image_a, image_b, flow])
+
+ # Perform data augmentation on GPU
+ with tf.device('/cpu:0'):
+ image_as, image_bs, transforms_from_a, transforms_from_b = \
+ _preprocessing_ops.data_augmentation(image_as,
+ image_bs,
+ global_step,
+ crop,
+ config_a['name'],
+ config_a['rand_type'],
+ config_a['exp'],
+ config_a['mean'],
+ config_a['spread'],
+ config_a['prob'],
+ config_a['coeff_schedule'],
+ config_b['name'],
+ config_b['rand_type'],
+ config_b['exp'],
+ config_b['mean'],
+ config_b['spread'],
+ config_b['prob'],
+ config_b['coeff_schedule'])
+
+ noise_coeff_a = None
+ noise_coeff_b = None
+
+ # Generate and apply noise coeff for A if defined in A params
+ if 'noise' in dataset_config['PREPROCESS']['image_a']:
+ discount_coeff = tf.constant(1.0)
+ if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_a']:
+ initial_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['initial_coeff']
+ final_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['final_coeff']
+ half_life = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['half_life']
+ discount_coeff = initial_coeff + \
+ (final_coeff - initial_coeff) * \
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0)
+
+ noise_coeff_a = _generate_coeff(
+ dataset_config['PREPROCESS']['image_a']['noise'], discount_coeff)
+ noise_a = tf.random_normal(shape=tf.shape(image_as),
+ mean=0.0, stddev=noise_coeff_a,
+ dtype=tf.float32)
+ image_as = tf.clip_by_value(image_as + noise_a, 0.0, 1.0)
+
+ # Generate noise coeff for B if defined in B params
+ if 'noise' in dataset_config['PREPROCESS']['image_b']:
+ discount_coeff = tf.constant(1.0)
+ if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_b']:
+ initial_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['initial_coeff']
+ final_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['final_coeff']
+ half_life = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['half_life']
+ discount_coeff = initial_coeff + \
+ (final_coeff - initial_coeff) * \
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0)
+ noise_coeff_b = _generate_coeff(
+ dataset_config['PREPROCESS']['image_b']['noise'], discount_coeff)
+
+ # Combine coeff from a with coeff from b
+ if noise_coeff_a is not None:
+ if noise_coeff_b is not None:
+ noise_coeff_b = noise_coeff_a * noise_coeff_b
+ else:
+ noise_coeff_b = noise_coeff_a
+
+ # Add noise to B if needed
+ if noise_coeff_b is not None:
+ noise_b = tf.random_normal(shape=tf.shape(image_bs),
+ mean=0.0, stddev=noise_coeff_b,
+ dtype=tf.float32)
+ image_bs = tf.clip_by_value(image_bs + noise_b, 0.0, 1.0)
+
+ # Perform flow augmentation using spatial parameters from data augmentation
+ flows = _preprocessing_ops.flow_augmentation(
+ flows, transforms_from_a, transforms_from_b, crop)
+
+ return tf.train.batch([image_as, image_bs, flows],
+ enqueue_many=True,
+ batch_size=dataset_config['BATCH_SIZE'],
+ capacity=dataset_config['BATCH_SIZE'] * 4,
+ num_threads=num_threads,
+ allow_smaller_final_batch=False)
diff --git a/Codes/flownet2/src/dataset_configs.py b/Codes/flownet2/src/dataset_configs.py
new file mode 100644
index 0000000..fbda5d0
--- /dev/null
+++ b/Codes/flownet2/src/dataset_configs.py
@@ -0,0 +1,153 @@
+"""
+Add dataset configurations here. Each dataset must have the following structure:
+
+NAME = {
+ IMAGE_HEIGHT: int,
+ IMAGE_WIDTH: int,
+ ITEMS_TO_DESCRIPTIONS: {
+ 'image_a': 'A 3-channel image.',
+ 'image_b': 'A 3-channel image.',
+ 'flow': 'A 2-channel optical flow field',
+ },
+ SIZES: {
+ 'train': int,
+ 'validate': int, (optional)
+ ...
+ },
+ BATCH_SIZE: int,
+ PATHS: {
+ 'train': '',
+ 'validate': '', (optional)
+ ...
+ }
+}
+"""
+
+"""
+note that one step = one batch of data processed, ~not~ an entire epoch
+'coeff_schedule_param': {
+ 'half_life': 50000, after this many steps, the value will be i + (f - i)/2
+ 'initial_coeff': 0.5, initial value
+ 'final_coeff': 1, final value
+},
+"""
+
+FLYING_CHAIRS_DATASET_CONFIG = {
+ 'IMAGE_HEIGHT': 384,
+ 'IMAGE_WIDTH': 512,
+ 'ITEMS_TO_DESCRIPTIONS': {
+ 'image_a': 'A 3-channel image.',
+ 'image_b': 'A 3-channel image.',
+ 'flow': 'A 2-channel optical flow field',
+ },
+ 'SIZES': {
+ 'train': 22232,
+ 'validate': 640,
+ 'sample': 8,
+ },
+ 'BATCH_SIZE': 8,
+ 'PATHS': {
+ 'train': './data/tfrecords/fc_train.tfrecords',
+ 'validate': './data/tfrecords/fc_val.tfrecords',
+ 'sample': './data/tfrecords/fc_sample.tfrecords',
+ },
+ 'PREPROCESS': {
+ 'scale': False,
+ 'crop_height': 320,
+ 'crop_width': 448,
+ 'image_a': {
+ 'translate': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'rotate': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'zoom': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': True,
+ 'mean': 0.2,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'squeeze': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.3,
+ 'prob': 1.0,
+ },
+ 'noise': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0.03,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ },
+ # All preprocessing to image A will be applied to image B in addition to the following.
+ 'image_b': {
+ 'translate': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'rotate': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'zoom': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'gamma': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'brightness': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'contrast': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'color': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'coeff_schedule_param': {
+ 'half_life': 50000,
+ 'initial_coeff': 0.5,
+ 'final_coeff': 1,
+ },
+ }
+ },
+}
diff --git a/Codes/flownet2/src/downsample.py b/Codes/flownet2/src/downsample.py
new file mode 100644
index 0000000..5e6fc95
--- /dev/null
+++ b/Codes/flownet2/src/downsample.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+_downsample = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/downsample.so"))
+
+
+def downsample(tensor, size):
+ return _downsample.downsample(tensor, size)
diff --git a/Codes/flownet2/src/flow_warp.py b/Codes/flownet2/src/flow_warp.py
new file mode 100644
index 0000000..fe5fd4d
--- /dev/null
+++ b/Codes/flownet2/src/flow_warp.py
@@ -0,0 +1,15 @@
+import tensorflow as tf
+
+_flow_warp_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/flow_warp.so"))
+
+
+def flow_warp(image, flow):
+ return _flow_warp_ops.flow_warp(image, flow)
+
+
+@tf.RegisterGradient("FlowWarp")
+def _flow_warp_grad(flow_warp_op, gradients):
+ return _flow_warp_ops.flow_warp_grad(flow_warp_op.inputs[0],
+ flow_warp_op.inputs[1],
+ gradients)
diff --git a/Codes/flownet2/src/flowlib.py b/Codes/flownet2/src/flowlib.py
new file mode 100644
index 0000000..36c56d4
--- /dev/null
+++ b/Codes/flownet2/src/flowlib.py
@@ -0,0 +1,554 @@
+#!/usr/bin/python
+"""
+# ==============================
+# flowlib.py
+# library for optical flow processing
+# Author: Ruoteng Li
+# Date: 6th Aug 2016
+# ==============================
+"""
+import png
+import numpy as np
+import matplotlib.colors as cl
+import matplotlib.pyplot as plt
+from PIL import Image
+import tensorflow as tf
+
+
+UNKNOWN_FLOW_THRESH = 1e7
+SMALLFLOW = 0.0
+LARGEFLOW = 1e8
+
+"""
+=============
+Flow Section
+=============
+"""
+
+
+def show_flow(filename):
+ """
+ visualize optical flow map using matplotlib
+ :param filename: optical flow file
+ :return: None
+ """
+ flow = read_flow(filename)
+ img = flow_to_image(flow)
+ plt.imshow(img)
+ plt.show()
+
+
+def visualize_flow(flow, mode='Y'):
+ """
+ this function visualize the input flow
+ :param flow: input flow in array
+ :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)
+ :return: None
+ """
+ if mode == 'Y':
+ # Ccbcr color wheel
+ img = flow_to_image(flow)
+ plt.imshow(img)
+ plt.show()
+ elif mode == 'RGB':
+ (h, w) = flow.shape[0:2]
+ du = flow[:, :, 0]
+ dv = flow[:, :, 1]
+ valid = flow[:, :, 2]
+ max_flow = max(np.max(du), np.max(dv))
+ img = np.zeros((h, w, 3), dtype=np.float64)
+ # angle layer
+ img[:, :, 0] = np.arctan2(dv, du) / (2 * np.pi)
+ # magnitude layer, normalized to 1
+ img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow
+ # phase layer
+ img[:, :, 2] = 8 - img[:, :, 1]
+ # clip to [0,1]
+ small_idx = img[:, :, 0:3] < 0
+ large_idx = img[:, :, 0:3] > 1
+ img[small_idx] = 0
+ img[large_idx] = 1
+ # convert to rgb
+ img = cl.hsv_to_rgb(img)
+ # remove invalid point
+ img[:, :, 0] = img[:, :, 0] * valid
+ img[:, :, 1] = img[:, :, 1] * valid
+ img[:, :, 2] = img[:, :, 2] * valid
+ # show
+ plt.imshow(img)
+ plt.show()
+
+ return None
+
+
+def read_flow(filename):
+ """
+ read optical flow from Middlebury .flo file
+ :param filename: name of the flow file
+ :return: optical flow data in matrix
+ """
+ f = open(filename, 'rb')
+ magic = np.fromfile(f, np.float32, count=1)
+ data2d = None
+
+ if 202021.25 != magic:
+ print('Magic number incorrect. Invalid .flo file')
+ else:
+ w = np.fromfile(f, np.int32, count=1)
+ h = np.fromfile(f, np.int32, count=1)
+ print("Reading %d x %d flo file" % (h, w))
+ data2d = np.fromfile(f, np.float32, count=2 * w * h)
+ # reshape data into 3D array (columns, rows, channels)
+ data2d = np.resize(data2d, (h[0], w[0], 2))
+ f.close()
+ return data2d
+
+
+def read_flow_png(flow_file):
+ """
+ Read optical flow from KITTI .png file
+ :param flow_file: name of the flow file
+ :return: optical flow data in matrix
+ """
+ flow_object = png.Reader(filename=flow_file)
+ flow_direct = flow_object.asDirect()
+ flow_data = list(flow_direct[2])
+ (w, h) = flow_direct[3]['size']
+ flow = np.zeros((h, w, 3), dtype=np.float64)
+ for i in range(len(flow_data)):
+ flow[i, :, 0] = flow_data[i][0::3]
+ flow[i, :, 1] = flow_data[i][1::3]
+ flow[i, :, 2] = flow_data[i][2::3]
+
+ invalid_idx = (flow[:, :, 2] == 0)
+ flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
+ flow[invalid_idx, 0] = 0
+ flow[invalid_idx, 1] = 0
+ return flow
+
+
+def write_flow(flow, filename):
+ """
+ write optical flow in Middlebury .flo format
+ :param flow: optical flow map
+ :param filename: optical flow file path to be saved
+ :return: None
+ """
+ f = open(filename, 'wb')
+ magic = np.array([202021.25], dtype=np.float32)
+ (height, width) = flow.shape[0:2]
+ w = np.array([width], dtype=np.int32)
+ h = np.array([height], dtype=np.int32)
+ magic.tofile(f)
+ w.tofile(f)
+ h.tofile(f)
+ flow.tofile(f)
+ f.close()
+
+
+def segment_flow(flow):
+ h = flow.shape[0]
+ w = flow.shape[1]
+ u = flow[:, :, 0]
+ v = flow[:, :, 1]
+
+ idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW))
+ idx2 = (abs(u) == SMALLFLOW)
+ class0 = (v == 0) & (u == 0)
+ u[idx2] = 0.00001
+ tan_value = v / u
+
+ class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)
+ class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)
+ class3 = (tan_value < -1) & (u <= 0) & (v >= 0)
+ class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)
+ class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)
+ class7 = (tan_value < -1) & (u >= 0) & (v <= 0)
+ class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)
+ class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)
+
+ seg = np.zeros((h, w))
+
+ seg[class1] = 1
+ seg[class2] = 2
+ seg[class3] = 3
+ seg[class4] = 4
+ seg[class5] = 5
+ seg[class6] = 6
+ seg[class7] = 7
+ seg[class8] = 8
+ seg[class0] = 0
+ seg[idx] = 0
+
+ return seg
+
+
+def flow_error(tu, tv, u, v):
+ """
+ Calculate average end point error
+ :param tu: ground-truth horizontal flow map
+ :param tv: ground-truth vertical flow map
+ :param u: estimated horizontal flow map
+ :param v: estimated vertical flow map
+ :return: End point error of the estimated flow
+ """
+ smallflow = 0.0
+ '''
+ stu = tu[bord+1:end-bord,bord+1:end-bord]
+ stv = tv[bord+1:end-bord,bord+1:end-bord]
+ su = u[bord+1:end-bord,bord+1:end-bord]
+ sv = v[bord+1:end-bord,bord+1:end-bord]
+ '''
+ stu = tu[:]
+ stv = tv[:]
+ su = u[:]
+ sv = v[:]
+
+ idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
+ stu[idxUnknow] = 0
+ stv[idxUnknow] = 0
+ su[idxUnknow] = 0
+ sv[idxUnknow] = 0
+
+ ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
+ index_su = su[ind2]
+ index_sv = sv[ind2]
+ an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
+ un = index_su * an
+ vn = index_sv * an
+
+ index_stu = stu[ind2]
+ index_stv = stv[ind2]
+ tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
+ tun = index_stu * tn
+ tvn = index_stv * tn
+
+ '''
+ angle = un * tun + vn * tvn + (an * tn)
+ index = [angle == 1.0]
+ angle[index] = 0.999
+ ang = np.arccos(angle)
+ mang = np.mean(ang)
+ mang = mang * 180 / np.pi
+ '''
+
+ epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
+ epe = epe[ind2]
+ mepe = np.mean(epe)
+ return mepe
+
+
+def flow_to_image(flow):
+ """
+ Convert flow into middlebury color code image
+ :param flow: optical flow map
+ :return: optical flow image in middlebury color
+ """
+ u = flow[:, :, 0]
+ v = flow[:, :, 1]
+
+ maxu = -999.
+ maxv = -999.
+ minu = 999.
+ minv = 999.
+
+ idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
+ u[idxUnknow] = 0
+ v[idxUnknow] = 0
+
+ maxu = max(maxu, np.max(u))
+ minu = min(minu, np.min(u))
+
+ maxv = max(maxv, np.max(v))
+ minv = min(minv, np.min(v))
+
+ rad = np.sqrt(u ** 2 + v ** 2)
+ maxrad = max(-1, np.max(rad))
+
+ # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu,maxu, minv, maxv))
+
+ u = u/(maxrad + np.finfo(float).eps)
+ v = v/(maxrad + np.finfo(float).eps)
+
+ img = compute_color(u, v)
+
+ idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+ img[idx] = 0
+
+ return np.uint8(img)
+
+
+def tf_flow_to_image(flow):
+ """
+ Convert flow into middlebury color code image
+ :param flow: optical flow map
+ :return: optical flow image in middlebury color
+ """
+ u = flow[:, :, :, 0]
+ v = flow[:, :, :, 1]
+
+ maxu = tf.constant(-999.)
+ maxv = tf.constant(-999.)
+ minu = tf.constant(999.)
+ minv = tf.constant(999.)
+
+ zeros = tf.zeros_like(u, dtype=tf.float32)
+ u = tf.where(tf.greater(u, UNKNOWN_FLOW_THRESH), zeros, u)
+ v = tf.where(tf.greater(v, UNKNOWN_FLOW_THRESH), zeros, v)
+
+ rad = tf.sqrt(u ** 2 + v ** 2)
+ maxrad = tf.reduce_max(-1, tf.reduce_max(rad))
+
+ # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu, maxu, minv, maxv))
+
+ u = u / (maxrad + np.finfo(float).eps)
+ v = v / (maxrad + np.finfo(float).eps)
+
+ img = compute_color(u, v)
+
+ # idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+ # img[idx] = 0
+
+ return np.uint8(img)
+
+
+def evaluate_flow_file(gt, pred):
+ """
+ evaluate the estimated optical flow end point error according to ground truth provided
+ :param gt: ground truth file path
+ :param pred: estimated optical flow file path
+ :return: end point error, float32
+ """
+ # Read flow files and calculate the errors
+ gt_flow = read_flow(gt) # ground truth flow
+ eva_flow = read_flow(pred) # predicted flow
+ # Calculate errors
+ average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])
+ return average_pe
+
+
+def evaluate_flow(gt_flow, pred_flow):
+ """
+ gt: ground-truth flow
+ pred: estimated flow
+ """
+ average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])
+ return average_pe
+
+
+"""
+==============
+Disparity Section
+==============
+"""
+
+
+def read_disp_png(file_name):
+ """
+ Read optical flow from KITTI .png file
+ :param file_name: name of the flow file
+ :return: optical flow data in matrix
+ """
+ image_object = png.Reader(filename=file_name)
+ image_direct = image_object.asDirect()
+ image_data = list(image_direct[2])
+ (w, h) = image_direct[3]['size']
+ channel = len(image_data[0]) / w
+ flow = np.zeros((h, w, channel), dtype=np.uint16)
+ for i in range(len(image_data)):
+ for j in range(channel):
+ flow[i, :, j] = image_data[i][j::channel]
+ return flow[:, :, 0] / 256
+
+
+def disp_to_flowfile(disp, filename):
+ """
+ Read KITTI disparity file in png format
+ :param disp: disparity matrix
+ :param filename: the flow file name to save
+ :return: None
+ """
+ f = open(filename, 'wb')
+ magic = np.array([202021.25], dtype=np.float32)
+ (height, width) = disp.shape[0:2]
+ w = np.array([width], dtype=np.int32)
+ h = np.array([height], dtype=np.int32)
+ empty_map = np.zeros((height, width), dtype=np.float32)
+ data = np.dstack((disp, empty_map))
+ magic.tofile(f)
+ w.tofile(f)
+ h.tofile(f)
+ data.tofile(f)
+ f.close()
+
+
+"""
+==============
+Image Section
+==============
+"""
+
+
+def read_image(filename):
+ """
+ Read normal image of any format
+ :param filename: name of the image file
+ :return: image data in matrix uint8 type
+ """
+ img = Image.open(filename)
+ im = np.array(img)
+ return im
+
+
+def warp_image(im, flow):
+ """
+ Use optical flow to warp image to the next
+ :param im: image to warp
+ :param flow: optical flow
+ :return: warped image
+ """
+ from scipy import interpolate
+ image_height = im.shape[0]
+ image_width = im.shape[1]
+ flow_height = flow.shape[0]
+ flow_width = flow.shape[1]
+ n = image_height * image_width
+ (iy, ix) = np.mgrid[0:image_height, 0:image_width]
+ (fy, fx) = np.mgrid[0:flow_height, 0:flow_width]
+ fx += flow[:,:,0]
+ fy += flow[:,:,1]
+ mask = np.logical_or(fx <0 , fx > flow_width)
+ mask = np.logical_or(mask, fy < 0)
+ mask = np.logical_or(mask, fy > flow_height)
+ fx = np.minimum(np.maximum(fx, 0), flow_width)
+ fy = np.minimum(np.maximum(fy, 0), flow_height)
+ points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)
+ xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)
+ warp = np.zeros((image_height, image_width, im.shape[2]))
+ for i in range(im.shape[2]):
+ channel = im[:, :, i]
+ plt.imshow(channel, cmap='gray')
+ values = channel.reshape(n, 1)
+ new_channel = interpolate.griddata(points, values, xi, method='cubic')
+ new_channel = np.reshape(new_channel, [flow_height, flow_width])
+ new_channel[mask] = 1
+ warp[:, :, i] = new_channel.astype(np.uint8)
+
+ return warp.astype(np.uint8)
+
+
+"""
+==============
+Others
+==============
+"""
+
+
+def scale_image(image, new_range):
+ """
+ Linearly scale the image into desired range
+ :param image: input image
+ :param new_range: the new range to be aligned
+ :return: image normalized in new range
+ """
+ min_val = np.min(image).astype(np.float32)
+ max_val = np.max(image).astype(np.float32)
+ min_val_new = np.array(min(new_range), dtype=np.float32)
+ max_val_new = np.array(max(new_range), dtype=np.float32)
+ scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new
+ return scaled_image.astype(np.uint8)
+
+
+def compute_color(u, v):
+ """
+ compute optical flow color map
+ :param u: optical flow horizontal map
+ :param v: optical flow vertical map
+ :return: optical flow in color code
+ """
+ [h, w] = u.shape
+ img = np.zeros([h, w, 3])
+ nanIdx = np.isnan(u) | np.isnan(v)
+ u[nanIdx] = 0
+ v[nanIdx] = 0
+
+ colorwheel = make_color_wheel()
+ # ncols = np.size(colorwheel, 0)
+ ncols = colorwheel.shape[0]
+
+ rad = np.sqrt(u**2+v**2)
+
+ a = np.arctan2(-v, -u) / np.pi
+
+ fk = (a+1) / 2 * (ncols - 1) + 1
+
+ k0 = np.floor(fk).astype(int)
+
+ k1 = k0 + 1
+ k1[k1 == ncols+1] = 1
+ f = fk - k0
+
+ for i in range(0, np.size(colorwheel, 1)):
+ tmp = colorwheel[:, i]
+ col0 = tmp[k0-1] / 255
+ col1 = tmp[k1-1] / 255
+ col = (1-f) * col0 + f * col1
+
+ idx = rad <= 1
+ col[idx] = 1-rad[idx]*(1-col[idx])
+ notidx = np.logical_not(idx)
+
+ col[notidx] *= 0.75
+ img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))
+
+ return img
+
+
+def make_color_wheel():
+ """
+ Generate color wheel according Middlebury color code
+ :return: Color wheel
+ """
+ RY = 15
+ YG = 6
+ GC = 4
+ CB = 11
+ BM = 13
+ MR = 6
+
+ ncols = RY + YG + GC + CB + BM + MR
+
+ colorwheel = np.zeros([ncols, 3])
+
+ col = 0
+
+ # RY
+ colorwheel[0:RY, 0] = 255
+ colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
+ col += RY
+
+ # YG
+ colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
+ colorwheel[col:col+YG, 1] = 255
+ col += YG
+
+ # GC
+ colorwheel[col:col+GC, 1] = 255
+ colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
+ col += GC
+
+ # CB
+ colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
+ colorwheel[col:col+CB, 2] = 255
+ col += CB
+
+ # BM
+ colorwheel[col:col+BM, 2] = 255
+ colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
+ col += + BM
+
+ # MR
+ colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
+ colorwheel[col:col+MR, 0] = 255
+
+ return colorwheel
diff --git a/Codes/flownet2/src/flownet2/__init__.py b/Codes/flownet2/src/flownet2/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/__init__.py
diff --git a/Codes/flownet2/src/flownet2/flownet2.py b/Codes/flownet2/src/flownet2/flownet2.py
new file mode 100644
index 0000000..d44ed10
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/flownet2.py
@@ -0,0 +1,118 @@
+from ..net import Net, Mode
+from ..flownet_css.flownet_css import FlowNetCSS
+from ..flownet_sd.flownet_sd import FlowNetSD
+from ..flow_warp import flow_warp
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..downsample import downsample
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNet2(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_css = FlowNetCSS(mode, debug)
+ self.net_sd = FlowNetSD(mode, debug)
+ super(FlowNet2, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNet2'):
+ # Forward pass through FlowNetCSS and FlowNetSD with weights frozen
+ net_css_predictions = self.net_css.model(inputs, training_schedule, trainable=True)
+ net_sd_predictions = self.net_sd.model(inputs, training_schedule, trainable=True)
+
+ def ChannelNorm(tensor):
+ sq = tf.square(tensor)
+ r_sum = tf.reduce_sum(sq, keep_dims=True, axis=3)
+ return tf.sqrt(r_sum)
+
+ sd_flow_norm = ChannelNorm(net_sd_predictions['flow'])
+ css_flow_norm = ChannelNorm(net_css_predictions['flow'])
+
+ flow_warp_sd = flow_warp(inputs['input_b'], net_sd_predictions['flow'])
+ img_diff_sd = inputs['input_a'] - flow_warp_sd
+ img_diff_sd_norm = ChannelNorm(img_diff_sd)
+
+ flow_warp_css = flow_warp(inputs['input_b'], net_css_predictions['flow'])
+ img_diff_css = inputs['input_a'] - flow_warp_css
+ img_diff_css_norm = ChannelNorm(img_diff_css)
+
+ input_to_fusion = tf.concat([inputs['input_a'],
+ net_sd_predictions['flow'],
+ net_css_predictions['flow'],
+ sd_flow_norm,
+ css_flow_norm,
+ img_diff_sd_norm,
+ img_diff_css_norm], axis=3)
+
+ # Fusion Network
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ fuse_conv0 = slim.conv2d(pad(input_to_fusion), 64, 3, scope='fuse_conv0')
+ fuse_conv1 = slim.conv2d(pad(fuse_conv0), 64, 3, stride=2, scope='fuse_conv1')
+ fuse_conv1_1 = slim.conv2d(pad(fuse_conv1), 128, 3, scope='fuse_conv1_1')
+ fuse_conv2 = slim.conv2d(pad(fuse_conv1_1), 128, 3,
+ stride=2, scope='fuse_conv2')
+ fuse_conv2_1 = slim.conv2d(pad(fuse_conv2), 128, 3, scope='fuse_conv2_1')
+
+ predict_flow2 = slim.conv2d(pad(fuse_conv2_1), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ fuse_deconv1 = antipad(slim.conv2d_transpose(fuse_conv2_1, 32, 4,
+ stride=2,
+ scope='fuse_deconv1'))
+ fuse_upsample_flow2to1 = antipad(slim.conv2d_transpose(predict_flow2, 2, 4,
+ stride=2,
+ scope='fuse_upsample_flow2to1',
+ activation_fn=None))
+ concat1 = tf.concat([fuse_conv1_1, fuse_deconv1,
+ fuse_upsample_flow2to1], axis=3)
+ fuse_interconv1 = slim.conv2d(pad(concat1), 32, 3,
+ activation_fn=None, scope='fuse_interconv1')
+
+ predict_flow1 = slim.conv2d(pad(fuse_interconv1), 2, 3,
+ scope='predict_flow1',
+ activation_fn=None)
+ fuse_deconv0 = antipad(slim.conv2d_transpose(concat1, 16, 4,
+ stride=2,
+ scope='fuse_deconv0'))
+ fuse_upsample_flow1to0 = antipad(slim.conv2d_transpose(predict_flow1, 2, 4,
+ stride=2,
+ scope='fuse_upsample_flow1to0',
+ activation_fn=None))
+ concat0 = tf.concat([fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0], axis=3)
+ fuse_interconv0 = slim.conv2d(pad(concat0), 16, 3,
+ activation_fn=None, scope='fuse_interconv0')
+
+ predict_flow0 = slim.conv2d(pad(fuse_interconv0), 2,
+ 3, activation_fn=None, scope='predict_flow0')
+
+ flow = tf.image.resize_bilinear(
+ predict_flow0, tf.stack([height, width]), align_corners=True)
+ print(predict_flow0)
+ print(flow)
+ return {
+ 'predict_flow0': predict_flow0,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ # L2 loss between predict_flow0, true flow (weighted w/ 0.005)
+ predict_flow0 = predictions['predict_flow0']
+ size = [predict_flow0.shape[1], predict_flow0.shape[2]]
+ downsampled_flow0 = downsample(flow, size)
+ loss = average_endpoint_error(downsampled_flow0, predict_flow0)
+ tf.losses.add_loss(loss)
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet2/test.py b/Codes/flownet2/src/flownet2/test.py
new file mode 100644
index 0000000..3177614
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet2 import FlowNet2
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNet2(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet2/train.py b/Codes/flownet2/src/flownet2/train.py
new file mode 100644
index 0000000..40c028d
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/train.py
@@ -0,0 +1,24 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet2 import FlowNet2
+
+# Create a new network
+net = FlowNet2()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_2',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for CSS and SD parts of network
+ checkpoints={
+ './checkpoints/FlowNetCSS-ft-sd/flownet-CSS-ft-sd.ckpt-0': ('FlowNet2/FlowNetCSS', 'FlowNet2'),
+ './checkpoints/FlowNetSD/flownet-SD.ckpt-0': ('FlowNet2/FlowNetSD', 'FlowNet2')
+ }
+)
diff --git a/Codes/flownet2/src/flownet_c/__init__.py b/Codes/flownet2/src/flownet_c/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/__init__.py
diff --git a/Codes/flownet2/src/flownet_c/flownet_c.py b/Codes/flownet2/src/flownet_c/flownet_c.py
new file mode 100644
index 0000000..d333ee2
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/flownet_c.py
@@ -0,0 +1,167 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..correlation import correlation
+from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetC(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetC, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNetC'):
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ with slim.arg_scope([slim.conv2d], stride=2):
+ conv_a_1 = slim.conv2d(pad(inputs['input_a'], 3), 64, 7, scope='conv1')
+ conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2')
+ conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3')
+
+ conv_b_1 = slim.conv2d(pad(inputs['input_b'], 3),
+ 64, 7, scope='conv1', reuse=True)
+ conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True)
+ conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True)
+
+ # Compute cross correlation with leaky relu activation
+ cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
+ cc_relu = LeakyReLU(cc)
+
+ # Combine cross correlation results with convolution of feature map A
+ netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir')
+ # Concatenate along the channels axis
+ net = tf.concat([netA_conv, cc_relu], axis=3)
+
+ conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1')
+ with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
+ conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+
+ predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+
+ predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+
+ predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3)
+
+ predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 20.0
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ flow = flow * 0.05
+
+ losses = []
+ INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+
+ # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ predict_flow6 = predictions['predict_flow6']
+ size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ downsampled_flow6 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+
+ # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ predict_flow5 = predictions['predict_flow5']
+ size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ downsampled_flow5 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+
+ # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ predict_flow4 = predictions['predict_flow4']
+ size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ downsampled_flow4 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+
+ # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ predict_flow3 = predictions['predict_flow3']
+ size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ downsampled_flow3 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+
+ # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ predict_flow2 = predictions['predict_flow2']
+ size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ downsampled_flow2 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+
+ loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_c/test.py b/Codes/flownet2/src/flownet_c/test.py
new file mode 100644
index 0000000..692f22d
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_c import FlowNetC
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetC(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetC/flownet-C.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_c/train.py b/Codes/flownet2/src/flownet_c/train.py
new file mode 100644
index 0000000..9296ac7
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_c import FlowNetC
+
+# Create a new network
+net = FlowNetC()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_c',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/flownet_cs/__init__.py b/Codes/flownet2/src/flownet_cs/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/__init__.py
diff --git a/Codes/flownet2/src/flownet_cs/flownet_cs.py b/Codes/flownet2/src/flownet_cs/flownet_cs.py
new file mode 100644
index 0000000..aeaea47
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/flownet_cs.py
@@ -0,0 +1,41 @@
+from ..net import Net, Mode
+from ..flownet_c.flownet_c import FlowNetC
+from ..flownet_s.flownet_s import FlowNetS
+from ..flow_warp import flow_warp
+import tensorflow as tf
+
+
+class FlowNetCS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_c = FlowNetC(mode, debug)
+ self.net_s = FlowNetS(mode, debug)
+ super(FlowNetCS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ with tf.variable_scope('FlowNetCS'):
+ # Forward pass through FlowNetC with weights frozen
+ net_c_predictions = self.net_c.model(inputs, training_schedule, trainable=True)
+
+ # Perform flow warping (to move image B closer to image A based on flow prediction)
+ warped = flow_warp(inputs['input_b'], net_c_predictions['flow'])
+
+ # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
+ brightness_error = inputs['input_a'] - warped
+ brightness_error = tf.square(brightness_error)
+ brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
+ brightness_error = tf.sqrt(brightness_error)
+
+ # Gather all inputs to FlowNetS
+ inputs_to_s = {
+ 'input_a': inputs['input_a'],
+ 'input_b': inputs['input_b'],
+ 'warped': warped,
+ 'flow': net_c_predictions['flow'] * 0.05,
+ 'brightness_error': brightness_error,
+ }
+
+ return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
+
+ def loss(self, flow, predictions):
+ return self.net_s.loss(flow, predictions)
diff --git a/Codes/flownet2/src/flownet_cs/test.py b/Codes/flownet2/src/flownet_cs/test.py
new file mode 100644
index 0000000..ae00ff4
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_cs import FlowNetCS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetCS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetCS/flownet-CS.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_cs/train.py b/Codes/flownet2/src/flownet_cs/train.py
new file mode 100644
index 0000000..9376132
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/train.py
@@ -0,0 +1,21 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_cs import FlowNetCS
+
+# Create a new network
+net = FlowNetCS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_cs',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for C part of network
+ checkpoints={'./checkpoints/FlowNetC/flownet-C.ckpt-0': ('FlowNetCS/FlowNetC', 'FlowNetCS')}
+)
diff --git a/Codes/flownet2/src/flownet_css/__init__.py b/Codes/flownet2/src/flownet_css/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/__init__.py
diff --git a/Codes/flownet2/src/flownet_css/flownet_css.py b/Codes/flownet2/src/flownet_css/flownet_css.py
new file mode 100644
index 0000000..93d9db2
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/flownet_css.py
@@ -0,0 +1,41 @@
+from ..net import Net, Mode
+from ..flownet_cs.flownet_cs import FlowNetCS
+from ..flownet_s.flownet_s import FlowNetS
+from ..flow_warp import flow_warp
+import tensorflow as tf
+
+
+class FlowNetCSS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_cs = FlowNetCS(mode, debug)
+ self.net_s = FlowNetS(mode, debug)
+ super(FlowNetCSS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ with tf.variable_scope('FlowNetCSS'):
+ # Forward pass through FlowNetCS with weights frozen
+ net_cs_predictions = self.net_cs.model(inputs, training_schedule, trainable=True)
+
+ # Perform flow warping (to move image B closer to image A based on flow prediction)
+ warped = flow_warp(inputs['input_b'], net_cs_predictions['flow'])
+
+ # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
+ brightness_error = inputs['input_a'] - warped
+ brightness_error = tf.square(brightness_error)
+ brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
+ brightness_error = tf.sqrt(brightness_error)
+
+ # Gather all inputs to FlowNetS
+ inputs_to_s = {
+ 'input_a': inputs['input_a'],
+ 'input_b': inputs['input_b'],
+ 'warped': warped,
+ 'flow': net_cs_predictions['flow'] * 0.05,
+ 'brightness_error': brightness_error,
+ }
+
+ return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
+
+ def loss(self, flow, predictions):
+ return self.net_s.loss(flow, predictions)
diff --git a/Codes/flownet2/src/flownet_css/test.py b/Codes/flownet2/src/flownet_css/test.py
new file mode 100644
index 0000000..9d1249e
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_css import FlowNetCSS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetCSS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetCSS/flownet-CSS.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_css/train.py b/Codes/flownet2/src/flownet_css/train.py
new file mode 100644
index 0000000..2964f3e
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/train.py
@@ -0,0 +1,22 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_css import FlowNetCSS
+
+# Create a new network
+net = FlowNetCSS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_css',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for CS part of network
+ checkpoints={
+ './checkpoints/FlowNetCS/flownet-CS.ckpt-0': ('FlowNetCSS/FlowNetCS', 'FlowNetCSS')}
+)
diff --git a/Codes/flownet2/src/flownet_s/__init__.py b/Codes/flownet2/src/flownet_s/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/__init__.py
diff --git a/Codes/flownet2/src/flownet_s/flownet_s.py b/Codes/flownet2/src/flownet_s/flownet_s.py
new file mode 100644
index 0000000..f6704b1
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/flownet_s.py
@@ -0,0 +1,161 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ stacked = False
+ with tf.variable_scope('FlowNetS'):
+ if 'warped' in inputs and 'flow' in inputs and 'brightness_error' in inputs:
+ stacked = True
+ concat_inputs = tf.concat([inputs['input_a'],
+ inputs['input_b'],
+ inputs['warped'],
+ inputs['flow'],
+ inputs['brightness_error']], axis=3)
+ else:
+ concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ with slim.arg_scope([slim.conv2d], stride=2):
+ conv_1 = slim.conv2d(pad(concat_inputs, 3), 64, 7, scope='conv1')
+ conv_2 = slim.conv2d(pad(conv_1, 2), 128, 5, scope='conv2')
+ conv_3 = slim.conv2d(pad(conv_2, 2), 256, 5, scope='conv3')
+
+ conv3_1 = slim.conv2d(pad(conv_3), 256, 3, scope='conv3_1')
+ with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
+ conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+
+ predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+
+ predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+
+ predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv_2, deconv2, upsample_flow3to2], axis=3)
+
+ predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 20.0
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ flow = flow * 0.05
+
+ losses = []
+ INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+
+ # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ predict_flow6 = predictions['predict_flow6']
+ size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ downsampled_flow6 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+
+ # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ predict_flow5 = predictions['predict_flow5']
+ size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ downsampled_flow5 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+
+ # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ predict_flow4 = predictions['predict_flow4']
+ size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ downsampled_flow4 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+
+ # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ predict_flow3 = predictions['predict_flow3']
+ size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ downsampled_flow3 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+
+ # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ predict_flow2 = predictions['predict_flow2']
+ size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ downsampled_flow2 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+
+ loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_s/test.py b/Codes/flownet2/src/flownet_s/test.py
new file mode 100644
index 0000000..ae1b2f3
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_s import FlowNetS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetS/flownet-S.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_s/train.py b/Codes/flownet2/src/flownet_s/train.py
new file mode 100644
index 0000000..13a792a
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_s import FlowNetS
+
+# Create a new network
+net = FlowNetS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_s_sample',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/flownet_sd/__init__.py b/Codes/flownet2/src/flownet_sd/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/__init__.py
diff --git a/Codes/flownet2/src/flownet_sd/flownet_sd.py b/Codes/flownet2/src/flownet_sd/flownet_sd.py
new file mode 100644
index 0000000..2f5c9e4
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/flownet_sd.py
@@ -0,0 +1,160 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+# from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetSD(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetSD, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True, reuse=None):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNetSD', reuse=reuse):
+ concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ conv0 = slim.conv2d(pad(concat_inputs), 64, 3, scope='conv0')
+ conv1 = slim.conv2d(pad(conv0), 64, 3, stride=2, scope='conv1')
+ conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope='conv1_1')
+ conv2 = slim.conv2d(pad(conv1_1), 128, 3, stride=2, scope='conv2')
+ conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope='conv2_1')
+ conv3 = slim.conv2d(pad(conv2_1), 256, 3, stride=2, scope='conv3')
+ conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope='conv3_1')
+ conv4 = slim.conv2d(pad(conv3_1), 512, 3, stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), 512, 3, stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+ interconv5 = slim.conv2d(pad(concat5), 512, 3,
+ activation_fn=None, scope='interconv5')
+
+ predict_flow5 = slim.conv2d(pad(interconv5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+ interconv4 = slim.conv2d(pad(concat4), 256, 3,
+ activation_fn=None, scope='interconv4')
+
+ predict_flow4 = slim.conv2d(pad(interconv4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+ interconv3 = slim.conv2d(pad(concat3), 128, 3,
+ activation_fn=None, scope='interconv3')
+
+ predict_flow3 = slim.conv2d(pad(interconv3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv2, deconv2, upsample_flow3to2], axis=3)
+ interconv2 = slim.conv2d(pad(concat2), 64, 3,
+ activation_fn=None, scope='interconv2')
+
+ predict_flow2 = slim.conv2d(pad(interconv2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 0.05
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ # def loss(self, flow, predictions):
+ # flow = flow * 20.0
+ #
+ # losses = []
+ # INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+ #
+ # # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ # predict_flow6 = predictions['predict_flow6']
+ # size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ # downsampled_flow6 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+ #
+ # # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ # predict_flow5 = predictions['predict_flow5']
+ # size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ # downsampled_flow5 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+ #
+ # # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ # predict_flow4 = predictions['predict_flow4']
+ # size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ # downsampled_flow4 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+ #
+ # # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ # predict_flow3 = predictions['predict_flow3']
+ # size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ # downsampled_flow3 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+ #
+ # # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ # predict_flow2 = predictions['predict_flow2']
+ # size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ # downsampled_flow2 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+ #
+ # loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+ #
+ # # Return the 'total' loss: loss fns + regularization terms defined in the model
+ # return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_sd/test.py b/Codes/flownet2/src/flownet_sd/test.py
new file mode 100644
index 0000000..b2ac285
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_sd import FlowNetSD
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetSD(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetSD/flownet-SD.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_sd/train.py b/Codes/flownet2/src/flownet_sd/train.py
new file mode 100644
index 0000000..86c64e5
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_sd import FlowNetSD
+
+# Create a new network
+net = FlowNetSD()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_sd_sample',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/net.py b/Codes/flownet2/src/net.py
new file mode 100644
index 0000000..43b2193
--- /dev/null
+++ b/Codes/flownet2/src/net.py
@@ -0,0 +1,177 @@
+import abc
+from enum import Enum
+import os
+import tensorflow as tf
+from .flowlib import flow_to_image, write_flow
+import numpy as np
+# from scipy.misc import imread, imsave, imresize
+import cv2
+import uuid
+from .training_schedules import LONG_SCHEDULE
+slim = tf.contrib.slim
+
+os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+class Mode(Enum):
+ TRAIN = 1
+ TEST = 2
+
+
+class Net(object):
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.global_step = slim.get_or_create_global_step()
+ self.mode = mode
+ self.debug = debug
+
+ @abc.abstractmethod
+ def model(self, inputs, training_schedule, trainable=True):
+ """
+ Defines the model and returns a tuple of Tensors needed for calculating the loss.
+ """
+ return
+
+ @abc.abstractmethod
+ def loss(self, **kwargs):
+ """
+ Accepts prediction Tensors from the output of `model`.
+ Returns a single Tensor representing the total loss of the model.
+ """
+ return
+ """
+ python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_6.png \
+ --input_b /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_7.png \
+ --out ./
+ python -m src.flownet_sd.test --input_a 006.png --input_b 007.png --out ./
+ python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/ped1/frames/testing/01/006.jpg \
+ --input_b /home/liuwen/ssd/videogan/ped1/frames/testing/01/007.jpg \
+ --out ./
+ """
+ def test(self, checkpoint, input_a_path, input_b_path, out_path, save_image=True, save_flo=False):
+ input_a = cv2.imread(input_a_path)
+ input_b = cv2.imread(input_b_path)
+
+ input_a = cv2.resize(input_a, (512, 384))
+ input_b = cv2.resize(input_b, (512, 384))
+ print(input_a.shape, input_b.shape)
+
+ # Convert from RGB -> BGR
+ # input_a = input_a[..., [2, 1, 0]]
+ # input_b = input_b[..., [2, 1, 0]]
+
+ # Scale from [0, 255] -> [0.0, 1.0] if needed
+ if input_a.max() > 1.0:
+ input_a = input_a / 255.0
+ if input_b.max() > 1.0:
+ input_b = input_b / 255.0
+
+ # TODO: This is a hack, we should get rid of this
+ training_schedule = LONG_SCHEDULE
+
+ inputs = {
+ 'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0),
+ 'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0),
+ }
+ predictions = self.model(inputs, training_schedule)
+ pred_flow = predictions['flow']
+
+ saver = tf.train.Saver()
+
+ config = tf.ConfigProto()
+ config.gpu_options.allow_growth = True
+ with tf.Session(config=config) as sess:
+ saver.restore(sess, checkpoint)
+ pred_flow = sess.run(pred_flow)[0, :, :, :]
+
+ np.save('temporal_ped1', pred_flow)
+
+ unique_name = 'flow-' + str(uuid.uuid4())
+ if save_image:
+ flow_img = flow_to_image(pred_flow)
+ full_out_path = os.path.join(out_path, unique_name + '.png')
+ cv2.imwrite(full_out_path, flow_img)
+
+ if save_flo:
+ full_out_path = os.path.join(out_path, unique_name + '.flo')
+ write_flow(pred_flow, full_out_path)
+
+ def train(self, log_dir, training_schedule, input_a, input_b, flow, checkpoints=None):
+ tf.summary.image("image_a", input_a, max_outputs=2)
+ tf.summary.image("image_b", input_b, max_outputs=2)
+
+ self.learning_rate = tf.train.piecewise_constant(
+ self.global_step,
+ [tf.cast(v, tf.int64) for v in training_schedule['step_values']],
+ training_schedule['learning_rates'])
+
+ optimizer = tf.train.AdamOptimizer(
+ self.learning_rate,
+ training_schedule['momentum'],
+ training_schedule['momentum2'])
+
+ inputs = {
+ 'input_a': input_a,
+ 'input_b': input_b,
+ }
+ predictions = self.model(inputs, training_schedule)
+ total_loss = self.loss(flow, predictions)
+ tf.summary.scalar('loss', total_loss)
+
+ if checkpoints:
+ for (checkpoint_path, (scope, new_scope)) in checkpoints.iteritems():
+ variables_to_restore = slim.get_variables(scope=scope)
+ renamed_variables = {
+ var.op.name.split(new_scope + '/')[1]: var
+ for var in variables_to_restore
+ }
+ restorer = tf.train.Saver(renamed_variables)
+ with tf.Session() as sess:
+ restorer.restore(sess, checkpoint_path)
+
+ # Show the generated flow in TensorBoard
+ if 'flow' in predictions:
+ pred_flow_0 = predictions['flow'][0, :, :, :]
+ pred_flow_0 = tf.py_func(flow_to_image, [pred_flow_0], tf.uint8)
+ pred_flow_1 = predictions['flow'][1, :, :, :]
+ pred_flow_1 = tf.py_func(flow_to_image, [pred_flow_1], tf.uint8)
+ pred_flow_img = tf.stack([pred_flow_0, pred_flow_1], 0)
+ tf.summary.image('pred_flow', pred_flow_img, max_outputs=2)
+
+ true_flow_0 = flow[0, :, :, :]
+ true_flow_0 = tf.py_func(flow_to_image, [true_flow_0], tf.uint8)
+ true_flow_1 = flow[1, :, :, :]
+ true_flow_1 = tf.py_func(flow_to_image, [true_flow_1], tf.uint8)
+ true_flow_img = tf.stack([true_flow_0, true_flow_1], 0)
+ tf.summary.image('true_flow', true_flow_img, max_outputs=2)
+
+ train_op = slim.learning.create_train_op(
+ total_loss,
+ optimizer,
+ summarize_gradients=True)
+
+ if self.debug:
+ with tf.Session() as sess:
+ sess.run(tf.global_variables_initializer())
+ tf.train.start_queue_runners(sess)
+ slim.learning.train_step(
+ sess,
+ train_op,
+ self.global_step,
+ {
+ 'should_trace': tf.constant(1),
+ 'should_log': tf.constant(1),
+ 'logdir': log_dir + '/debug',
+ }
+ )
+ else:
+ slim.learning.train(
+ train_op,
+ log_dir,
+ # session_config=tf.ConfigProto(allow_soft_placement=True),
+ global_step=self.global_step,
+ save_summaries_secs=60,
+ number_of_steps=training_schedule['max_iter']
+ )
diff --git a/Codes/flownet2/src/ops/build/.gitkeep b/Codes/flownet2/src/ops/build/.gitkeep
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/ops/build/.gitkeep
diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc
new file mode 100644
index 0000000..4e92f45
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc
@@ -0,0 +1,160 @@
+#define EIGEN_USE_THREADS
+
+#include "correlation_kernel.h"
+#include "pad.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class CorrelationGradKernel : public OpKernel {
+ public:
+ explicit CorrelationGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the attributes
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
+
+ OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and verify their dimensions
+ const Tensor& gradients_t = ctx->input(0);
+ const Tensor& input_a_t = ctx->input(1);
+ const Tensor& input_b_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
+ OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
+
+ // Get dimensions of input
+ const int batch_size = input_a_t.dim_size(0);
+ const int in_height = input_a_t.dim_size(1);
+ const int in_width = input_a_t.dim_size(2);
+ const int in_channels = input_a_t.dim_size(3);
+ const int in_count_per_sample = in_height * in_width * in_channels;
+ const int padded_height = in_height + 2 * pad;
+ const int padded_width = in_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ const int kernel_radius = (kernel_size - 1) / 2;
+ const int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ const int out_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ const int out_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ const int neighborhood_grid_radius = max_displacement / stride_2;
+ const int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ const int out_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Allocate the memory for the outputs
+ Tensor *output_a_gradient_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_a_t.shape(), &output_a_gradient_t));
+ Tensor *output_b_gradient_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(1, input_b_t.shape(), &output_b_gradient_t));
+
+ // Get the tensors
+ auto gradients = gradients_t.tensor<float, 4>();
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output_a_gradient = output_a_gradient_t->tensor<float, 4>();
+ auto output_b_gradient = output_b_gradient_t->tensor<float, 4>();
+
+ // Create temporary tensors for padded inputs
+ Tensor padded_input_a_t, padded_input_b_t;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, in_channels }),
+ &padded_input_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, in_channels }),
+ &padded_input_b_t));
+ auto padded_input_a = padded_input_a_t.tensor<float, 4>();
+ auto padded_input_b = padded_input_b_t.tensor<float, 4>();
+
+ // Pad the inputs
+ Pad(ctx->eigen_device<Device>(),
+ input_a.data(),
+ batch_size,
+ in_height,
+ in_width,
+ in_channels,
+ padded_height,
+ padded_width,
+ padded_input_a.data());
+ Pad(ctx->eigen_device<Device>(),
+ input_b.data(),
+ batch_size,
+ in_height,
+ in_width,
+ in_channels,
+ padded_height,
+ padded_width,
+ padded_input_b.data());
+
+ CorrelationGradA(ctx->eigen_gpu_device(),
+ batch_size,
+ out_width,
+ out_height,
+ out_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ stride_1,
+ stride_2,
+ in_width,
+ in_height,
+ padded_width,
+ padded_height,
+ in_channels,
+ in_count_per_sample,
+ pad,
+ padded_input_b.data(),
+ gradients.data(),
+ output_a_gradient.data());
+
+ CorrelationGradB(ctx->eigen_gpu_device(),
+ batch_size,
+ out_width,
+ out_height,
+ out_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ stride_1,
+ stride_2,
+ in_width,
+ in_height,
+ padded_width,
+ padded_height,
+ in_channels,
+ in_count_per_sample,
+ pad,
+ padded_input_a.data(),
+ gradients.data(),
+ output_b_gradient.data());
+ }
+
+ private:
+ int kernel_size;
+ int max_displacement;
+ int stride_1;
+ int stride_2;
+ int pad;
+};
+
+REGISTER_KERNEL_BUILDER(Name("CorrelationGrad")
+ .Device(DEVICE_GPU),
+ CorrelationGradKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc
new file mode 100644
index 0000000..19e3a40
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc
@@ -0,0 +1,262 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#define ROUND_OFF 50000
+
+#include <stdio.h>
+#include <iostream>
+
+#include "correlation_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void CorrelateDataBackward0(const int nthreads,
+ int item,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int stride_1,
+ int stride_2,
+ int in_width,
+ int in_height,
+ int padded_in_width,
+ int padded_in_height,
+ int in_channels,
+ int in_count_per_sample,
+ int pad_size,
+ float *output_a_gradient,
+ const float *input_b,
+ const float *gradient)
+{
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int k = index % in_channels; // channels
+ int x = (index / in_channels) % in_width + pad_size; // w-pos
+ int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos
+
+ // Get X,Y ranges and clamp
+ // round_off is a trick to enable integer division with ceil, even for
+ // negative numbers
+ // We use a large offset, for the inner part not to become negative.
+ const int round_off = ROUND_OFF;
+ const int round_off_s1 = stride_1 * round_off;
+
+ // We add round_off before_s1 the int division and subtract round_off after
+ // it, to ensure the formula matches ceil behavior:
+ int xmin = (x - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 -
+ round_off;
+ int ymin = (y - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 -
+ round_off;
+
+ // Same here:
+ int xmax = (x - max_displacement + round_off_s1) / stride_1 - round_off;
+ int ymax = (y - max_displacement + round_off_s1) / stride_1 - round_off;
+
+ float sum = 0;
+
+ if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) {
+ xmin = max(0, xmin);
+ xmax = min(out_width - 1, xmax);
+
+ ymin = max(0, ymin);
+ ymax = min(out_height - 1, ymax);
+
+ for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+ for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+ // Get input_b data:
+ int s2o = stride_2 * o;
+ int s2p = stride_2 * p;
+ int idx_input_b = ((item * padded_in_height + (y + s2p)) * padded_in_width + (x + s2o)) *
+ in_channels + k;
+ float input_b_tmp = input_b[idx_input_b]; // input_b[x+s2o,y+s2p,k]
+
+ // Index offset for gradient in following loops:
+ int op = (p + neighborhood_grid_radius) * neighborhood_grid_width +
+ (o + neighborhood_grid_radius); // index [o,p]
+
+ for (int y = ymin; y <= ymax; y++) {
+ for (int x = xmin; x <= xmax; x++) {
+ // gradient[x,y,o,p]
+ int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op;
+ sum += gradient[idx_gradient] * input_b_tmp;
+ }
+ }
+ }
+ }
+ }
+ const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels;
+ const int input_a_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k;
+ output_a_gradient[input_a_idx + item * in_count_per_sample] = sum / (float)sumelems;
+ }
+}
+
+__global__ void CorrelateDataBackward1(const int nthreads,
+ int item,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int stride_1,
+ int stride_2,
+ int in_width,
+ int in_height,
+ int padded_in_width,
+ int padded_in_height,
+ int in_channels,
+ int in_count_per_sample,
+ int pad_size,
+ float *output_b_gradient,
+ const float *input_a,
+ const float *gradient)
+{
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int k = index % in_channels; // channels
+ int x = (index / in_channels) % in_width + pad_size; // w-pos
+ int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos
+
+ // round_off is a trick to enable integer division with ceil, even for
+ // negative numbers
+ // We use a large offset, for the inner part not to become negative.
+ const int round_off = ROUND_OFF;
+ const int round_off_s1 = stride_1 * round_off;
+
+ float sum = 0;
+
+ // Height (y)
+ for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+ // Width (x)
+ for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+ int s2o = stride_2 * o;
+ int s2p = stride_2 * p;
+
+ // Get X,Y ranges and clamp
+ // We add round_off before_s1 the int division and subtract round_off
+ // after it, to ensure the formula matches ceil behavior:
+ int xmin = (x - 2 * kernel_radius - max_displacement - s2o + round_off_s1 - 1) / stride_1 +
+ 1 - round_off;
+ int ymin = (y - 2 * kernel_radius - max_displacement - s2p + round_off_s1 - 1) / stride_1 +
+ 1 - round_off;
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+
+ // Same here:
+ int xmax = (x - max_displacement - s2o + round_off_s1) / stride_1 - round_off;
+ int ymax = (y - max_displacement - s2p + round_off_s1) / stride_1 - round_off;
+
+ if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) {
+ xmin = max(0, xmin);
+ xmax = min(out_width - 1, xmax);
+
+ ymin = max(0, ymin);
+ ymax = min(out_height - 1, ymax);
+
+ // Get input_a data:
+ int idx_input_a = ((item * padded_in_height + (y - s2p)) * padded_in_width + (x - s2o)) *
+ in_channels + k;
+ float input_a_tmp = input_a[idx_input_a];
+
+ // Index offset for gradient in following loops:
+ int op = (p + neighborhood_grid_radius) * neighborhood_grid_width +
+ (o + neighborhood_grid_radius); // index [o,p]
+
+ for (int y = ymin; y <= ymax; y++) {
+ for (int x = xmin; x <= xmax; x++) {
+ int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op;
+ sum += gradient[idx_gradient] * input_a_tmp;
+ }
+ }
+ }
+ }
+ }
+ const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels;
+ const int input_b_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k;
+ output_b_gradient[input_b_idx + item * in_count_per_sample] = sum / (float)sumelems;
+ }
+}
+
+void CorrelationGradA(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample, // h * w * ch
+ const int pad,
+ const float *input_b,
+ const float *gradient,
+ float *output_a_gradient) {
+ CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device);
+
+ for (int n = 0; n < batch_size; n++) {
+ CorrelateDataBackward0 << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ in_count_per_sample,
+ n, out_width, out_height, out_channels,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ stride_1, stride_2,
+ in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad,
+ output_a_gradient, input_b, gradient);
+ }
+}
+
+void CorrelationGradB(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_a,
+ const float *gradient,
+ float *output_b_gradient) {
+ CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device);
+
+ for (int n = 0; n < batch_size; n++) {
+ CorrelateDataBackward1 << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ in_count_per_sample,
+ n, out_width, out_height, out_channels,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ stride_1, stride_2,
+ in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad,
+ output_b_gradient, input_a, gradient);
+ }
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc
new file mode 100644
index 0000000..f8a5193
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc
@@ -0,0 +1,137 @@
+#define EIGEN_USE_THREADS
+
+#include <utility>
+
+#include "correlation_kernel.h"
+#include "pad.h"
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+template<typename Device>
+class CorrelationKernel : public OpKernel {
+ public:
+ explicit CorrelationKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the attributes
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
+
+ OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& input_a_t = ctx->input(0);
+ const Tensor& input_b_t = ctx->input(1);
+
+ OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
+ OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
+
+ // Get dimensions of input (already padded)
+ int batch_size = input_a_t.dim_size(0);
+ int input_height = input_a_t.dim_size(1);
+ int input_width = input_a_t.dim_size(2);
+ int input_channels = input_a_t.dim_size(3);
+ int padded_height = input_height + 2 * pad;
+ int padded_width = input_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ int kernel_radius = (kernel_size - 1) / 2;
+ int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ int output_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ int output_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ OP_REQUIRES(ctx, output_height >= 1,
+ errors::InvalidArgument("Neighborhood and kernel don't fit in input height."));
+ OP_REQUIRES(ctx, output_width >= 1,
+ errors::InvalidArgument("Neighborhood and kernel don't fit in input width."));
+
+ int neighborhood_grid_radius = max_displacement / stride_2;
+ int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ int output_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0,
+ TensorShape({ batch_size, output_height, output_width, output_channels }),
+ &output_t));
+
+ // Get the tensors
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ // Create temporary tensors for padded inputs
+ Tensor padded_input_a_t, padded_input_b_t;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, input_channels }),
+ &padded_input_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, input_channels }),
+ &padded_input_b_t));
+ auto padded_input_a = padded_input_a_t.tensor<float, 4>();
+ auto padded_input_b = padded_input_b_t.tensor<float, 4>();
+
+ // Pad the inputs
+ Pad(ctx->eigen_device<Device>(),
+ input_a.data(),
+ batch_size,
+ input_height,
+ input_width,
+ input_channels,
+ padded_height,
+ padded_width,
+ padded_input_a.data());
+ Pad(ctx->eigen_device<Device>(),
+ input_b.data(),
+ batch_size,
+ input_height,
+ input_width,
+ input_channels,
+ padded_height,
+ padded_width,
+ padded_input_b.data());
+
+ // Perform cross correlation
+ Correlation(ctx->eigen_device<Device>(),
+ padded_input_a.data(),
+ padded_input_b.data(),
+ batch_size,
+ output_height,
+ output_width,
+ output_channels,
+ output_height * output_width * output_channels,
+ padded_height,
+ padded_width,
+ input_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ kernel_size,
+ stride_1,
+ stride_2,
+ output.data());
+ }
+
+ private:
+ int kernel_size;
+ int max_displacement;
+ int stride_1;
+ int stride_2;
+ int pad;
+};
+
+REGISTER_KERNEL_BUILDER(Name("Correlation")
+ .Device(DEVICE_GPU),
+ CorrelationKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc
new file mode 100644
index 0000000..c63e489
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc
@@ -0,0 +1,153 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#define WARPS_PER_BLOCK 1
+#define THREADS_PER_WARP 32
+
+#include <stdio.h>
+#include <iostream>
+
+#include "correlation_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void CorrelateData(int batch_size,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int out_count,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ int in_width_padded,
+ int in_height_padded,
+ int in_channels,
+ const float *input_a,
+ const float *input_b,
+ float *output) {
+ extern __shared__ char patch_data_char[];
+
+ float *patch_data = (float *)patch_data_char;
+
+ // First (upper left) position of kernel upper-left corner in current center
+ // position of neighborhood in image 1
+ int x1 = blockIdx.x * stride_1 + max_displacement;
+ int y1 = blockIdx.y * stride_1 + max_displacement;
+ int item = blockIdx.z;
+ int ch_off = threadIdx.x;
+
+ // Load 3D patch into shared shared memory
+ // HEIGHT
+ for (int j = 0; j < kernel_size; j++) {
+ // WIDTH
+ for (int i = 0; i < kernel_size; i++) {
+ int ji_off = ((j * kernel_size) + i) * in_channels;
+
+ // CHANNELS
+ for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
+ int idx1 = ((item * in_height_padded + y1 + j) * in_width_padded + x1 + i) *
+ in_channels + ch;
+ int idxPatchData = ji_off + ch;
+ patch_data[idxPatchData] = input_a[idx1];
+ }
+ }
+ }
+
+ __syncthreads();
+
+ __shared__ float sum[WARPS_PER_BLOCK * THREADS_PER_WARP];
+
+ // Compute correlation
+ for (int out_channel = 0; out_channel < out_channels; out_channel++) {
+ sum[ch_off] = 0;
+
+ int s2o = (out_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
+ int s2p = (out_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
+ int x2 = x1 + s2o;
+ int y2 = y1 + s2p;
+
+ // HEIGHT
+ for (int j = 0; j < kernel_size; j++) {
+ // WIDTH
+ for (int i = 0; i < kernel_size; i++) {
+ int ji_off = ((j * kernel_size) + i) * in_channels;
+
+ // CHANNELS
+ for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
+ int idxPatchData = ji_off + ch;
+ int idx2 = ((item * in_height_padded + y2 + j) * in_width_padded + x2 + i) *
+ in_channels + ch;
+
+ sum[ch_off] += patch_data[idxPatchData] * input_b[idx2];
+ }
+ }
+ }
+
+ __syncthreads();
+
+ if (ch_off == 0) {
+ float total_sum = 0;
+
+ for (int idx = 0; idx < WARPS_PER_BLOCK * THREADS_PER_WARP; idx++) {
+ total_sum += sum[idx];
+ }
+ const int sumelems = kernel_size * kernel_size * in_channels;
+ const int index = (blockIdx.y * out_width + blockIdx.x) * out_channels + out_channel;
+
+ /* from Caffe: const int index = ((out_channel * out_height +
+ blockIdx.y) * out_width) + blockIdx.x; */
+ output[index + item * out_count] = total_sum / (float)sumelems;
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ // n = 0
+ // caffe: ((k * H + h) * W + w) + n * K * H * W
+ // tf: (h * W + w) * K + k + n * H * W * K
+ }
+ }
+}
+
+void Correlation(const GPUDevice& device,
+ const float *input_a,
+ const float *input_b,
+ const int batch_size,
+ const int out_height,
+ const int out_width,
+ const int out_channels,
+ const int out_count,
+ const int in_height_padded,
+ const int in_width_padded,
+ const int in_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ float *output) {
+ dim3 totalBlocksCorr(out_width, out_height, batch_size);
+ dim3 threadsPerBlock(THREADS_PER_WARP *WARPS_PER_BLOCK);
+ const int shared_memory_per_block = (kernel_size * kernel_size) * in_channels;
+
+ CorrelateData << < totalBlocksCorr, threadsPerBlock, shared_memory_per_block * sizeof(float),
+ device.stream() >> > (
+ batch_size, out_width, out_height, out_channels, out_count,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ kernel_size, stride_1, stride_2, in_width_padded, in_height_padded, in_channels,
+ input_a, input_b, output);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.h b/Codes/flownet2/src/ops/correlation/correlation_kernel.h
new file mode 100644
index 0000000..a1dfb62
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.h
@@ -0,0 +1,77 @@
+#ifndef FLOWNET_CORRELATION_H_
+#define FLOWNET_CORRELATION_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void Correlation(const GPUDevice& device,
+ const float *input_a,
+ const float *input_b,
+ const int batch_size,
+ const int out_height,
+ const int out_width,
+ const int out_channels,
+ const int out_count,
+ const int in_height_padded,
+ const int in_width_padded,
+ const int in_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ float *output);
+
+
+void CorrelationGradA(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_b,
+ const float *gradient,
+ float *output_a_gradient);
+
+void CorrelationGradB(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_a,
+ const float *gradient,
+ float *output_b_gradient);
+} // end namespace tensorflow
+
+#endif // FLOWNET_CORRELATION_H_
diff --git a/Codes/flownet2/src/ops/correlation/correlation_op.cc b/Codes/flownet2/src/ops/correlation/correlation_op.cc
new file mode 100644
index 0000000..4f420f0
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_op.cc
@@ -0,0 +1,83 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status SetOutput(InferenceContext *c) {
+ ShapeHandle input_a, input_b, input;
+
+ // Get shapes of both inputs and verify they are rank 4
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_a));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_b));
+
+ // Verify inputs are same dimensions
+ TF_RETURN_IF_ERROR(c->Merge(input_a, input_b, &input));
+
+ // Get the attributes
+ int kernel_size, max_displacement, stride_1, stride_2, pad;
+ TF_RETURN_IF_ERROR(c->GetAttr("kernel_size", &kernel_size));
+ TF_RETURN_IF_ERROR(c->GetAttr("max_displacement", &max_displacement));
+ TF_RETURN_IF_ERROR(c->GetAttr("stride_1", &stride_1));
+ TF_RETURN_IF_ERROR(c->GetAttr("stride_2", &stride_2));
+ TF_RETURN_IF_ERROR(c->GetAttr("pad", &pad));
+
+ // Get dimensions of input (already padded)
+ int64 batch = c->Value(c->Dim(input, 0));
+ int64 input_height = c->Value(c->Dim(input, 1));
+ int64 input_width = c->Value(c->Dim(input, 2));
+ int64 padded_height = input_height + 2 * pad;
+ int64 padded_width = input_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ int kernel_radius = (kernel_size - 1) / 2;
+ int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ int64 output_height = (int64)ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ int64 output_width = (int64)ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ // TODO: Verify output size >= 1
+
+ int neighborhood_grid_radius = max_displacement / stride_2;
+ int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ int64 output_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Set output shape
+ c->set_output(0, c->MakeShape({ batch, output_height, output_width, output_channels }));
+ return Status::OK();
+}
+
+REGISTER_OP("Correlation")
+.Input("input_a: float32")
+.Input("input_b: float32")
+.Attr("kernel_size: int")
+.Attr("max_displacement: int")
+.Attr("stride_1: int")
+.Attr("stride_2: int")
+.Attr("pad: int")
+.Output("output: float32")
+.SetShapeFn(SetOutput);
+
+REGISTER_OP("CorrelationGrad")
+.Input("gradients: float32")
+.Input("input_a: float32")
+.Input("input_b: float32")
+.Attr("kernel_size: int")
+.Attr("max_displacement: int")
+.Attr("stride_1: int")
+.Attr("stride_2: int")
+.Attr("pad: int")
+.Output("backprops_a: float32")
+.Output("backprops_b: float32")
+.SetShapeFn([](InferenceContext *c) {
+ // Output gradients should be the same dimensions as the inputs
+ ShapeHandle out;
+ TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->input(2), &out));
+ c->set_output(0, out);
+ c->set_output(1, out);
+ return Status::OK();
+ });
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/pad.cu.cc b/Codes/flownet2/src/ops/correlation/pad.cu.cc
new file mode 100644
index 0000000..0b6c93d
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/pad.cu.cc
@@ -0,0 +1,76 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "pad.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void PadData(
+ const float *in,
+ int in_widthheight,
+ int in_width,
+ int in_height,
+ int out_width,
+ int out_height,
+ int channels,
+ int padding,
+ float *out) {
+ int xy = blockIdx.x * blockDim.x + threadIdx.x;
+
+ int x = xy % in_width;
+ int y = xy / in_width;
+ int ch = blockIdx.y;
+ int n = blockIdx.z;
+
+ if (xy >= in_widthheight) {
+ out[((n * out_height + y) * out_width + x) * channels + ch] = 0.0;
+ return;
+ }
+
+ float value = in[((n * in_height + y) * in_width + x) * channels + ch];
+
+ __syncthreads();
+
+ int xpad = x + padding;
+ int ypad = y + padding;
+
+ out[((n * out_height + ypad) * out_width + xpad) * channels + ch] = value;
+}
+
+void Pad(const GPUDevice& device,
+ const float *input,
+ int batch_size,
+ int input_height,
+ int input_width,
+ int input_channels,
+ int output_height,
+ int output_width,
+ float *output) {
+ int in_widthheight = input_width * input_height;
+ int threads_per_block = 16;
+ dim3 totalBlocks((in_widthheight - 1) / threads_per_block + 1, input_channels, batch_size);
+
+ cudaMemset(output, 0, batch_size * output_height * output_width * input_channels * sizeof(float));
+
+ int padding = (output_height - input_height) / 2;
+
+ // LAUNCH KERNEL
+ PadData << < totalBlocks, threads_per_block, 0, device.stream() >> > (
+ input,
+ in_widthheight,
+ input_width,
+ input_height,
+ output_width,
+ output_height,
+ input_channels,
+ padding,
+ output);
+}
+}
+#endif // if GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/pad.h b/Codes/flownet2/src/ops/correlation/pad.h
new file mode 100644
index 0000000..afb4df0
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/pad.h
@@ -0,0 +1,20 @@
+#ifndef FLOWNET_PAD_H_
+#define FLOWNET_PAD_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void Pad(const GPUDevice& device,
+ const float *input,
+ int batch_size,
+ int input_height,
+ int input_width,
+ int input_channels,
+ int output_height,
+ int output_width,
+ float *output);
+} // end namespace tensorflow
+
+#endif // ifndef FLOWNET_PAD_H_
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc
new file mode 100644
index 0000000..eefe247
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc
@@ -0,0 +1,47 @@
+#define EIGEN_USE_THREADS
+
+#include "downsample_kernel.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device>
+class DownsampleKernel : public OpKernel {
+ public:
+ explicit DownsampleKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {
+ // Get the size [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("size", &size_));
+ OP_REQUIRES(ctx, size_.size() == 2, errors::InvalidArgument("size must be 2 dimensions"));
+ }
+
+ void Compute(OpKernelContext* ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& input_t = ctx->input(0);
+ OP_REQUIRES(ctx, input_t.dims() == 4,
+ errors::InvalidArgument("Input images must have rank 4"));
+
+ // Allocate the memory for the output
+ Tensor* output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0, TensorShape({input_t.dim_size(0), size_[0], size_[1], input_t.dim_size(3)}), &output_t));
+
+ // Perform flow augmentation
+ auto input = input_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ Downsample(ctx->eigen_gpu_device(), input, output);
+ }
+
+ private:
+ std::vector<int32> size_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("Downsample")
+ .Device(DEVICE_GPU),
+ DownsampleKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.h b/Codes/flownet2/src/ops/downsample/downsample_kernel.h
new file mode 100644
index 0000000..bcc4e3f
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.h
@@ -0,0 +1,18 @@
+#ifndef FLOWNET_DOWNSAMPLE_H_
+#define FLOWNET_DOWNSAMPLE_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+bool Downsample(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::Tensor output);
+
+} // end namespace tensorflow
+
+#endif // FLOWNET_DOWNSAMPLE_H_
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc
new file mode 100644
index 0000000..b7629a0
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc
@@ -0,0 +1,108 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "downsample_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#define CUDART_NAN_F __int_as_float(0x7fffffff)
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void DownsampleKernel(
+ const int32 nthreads,
+ const float* input_ptr,
+ float* output_ptr,
+ const int in_width,
+ const int in_height,
+ const int out_width,
+ const int out_height,
+ const int channels,
+ const float width_scale,
+ const float height_scale,
+ const int wradius,
+ const int hradius) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ const int c = index % channels;
+ const int destx = (index / channels) % out_width;
+ const int desty = (index / channels / out_width) % out_height;
+ const int n = (index / channels / out_width) / out_height;
+
+ const float srcx = ((float)destx / (float)(out_width - 1)) * (float)(in_width - 1);
+ const float srcy = ((float)desty / (float)(out_height - 1)) * (float)(in_height - 1);
+
+ const int isrcx = round(srcx);
+ const int isrcy = round(srcy);
+
+ float accum_value = 0;
+ float accum_weight = 0;
+ float accum_nan = 0;
+
+ for (int dy = -hradius; dy <= hradius; dy++) {
+ int yoff = isrcy + dy;
+ //
+ for (int dx = -wradius; dx <= wradius; dx++) {
+ int xoff = isrcx + dx;
+
+ if (xoff >= 0 && yoff >= 0 && xoff < in_width && yoff < in_height) {
+ int idx = ((n * in_height + yoff) * in_width + xoff) * channels + c;
+ float sample = input_ptr[idx];
+ float weight = fmaxf(0.0f, 1.0f - (fabsf((float)xoff - srcx) / width_scale))
+ * fmaxf(0.0f, 1.0f - (fabsf((float)yoff - srcy) / height_scale));
+ if (sample != sample) { // isnan
+ accum_nan += weight;
+ sample = 0;
+ weight = 0;
+ }
+ accum_value += sample * weight;
+ accum_weight += weight;
+ }
+ }
+ }
+
+ if (accum_nan / accum_weight > 0.5) {
+ output_ptr[index] = CUDART_NAN_F;
+ } else {
+ output_ptr[index] = accum_value / accum_weight;
+ }
+ }
+}
+
+bool Downsample(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::Tensor output) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int out_channels = output.dimension(3);
+ const int total_count = batch_size * out_height * out_width * out_channels;
+
+ const int in_height = input.dimension(1);
+ const int in_width = input.dimension(2);
+
+ const float width_scale = (float)(in_width - 1) / (float)(out_width - 1);
+ const float height_scale = (float)(in_height - 1) / (float)(out_height - 1);
+
+ const int wradius = ceil(width_scale);
+ const int hradius = ceil(height_scale);
+
+ CudaLaunchConfig config = GetCudaLaunchConfig(total_count, device);
+ DownsampleKernel<<<config.block_count, config.thread_per_block, 0,
+ device.stream()>>>(total_count, input.data(), output.data(),
+ in_width, in_height, out_width, out_height, out_channels,
+ width_scale, height_scale, wradius, hradius);
+ return device.ok();
+}
+
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/downsample/downsample_op.cc b/Codes/flownet2/src/ops/downsample/downsample_op.cc
new file mode 100644
index 0000000..6980dc7
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_op.cc
@@ -0,0 +1,30 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::DimensionHandle;
+
+Status SetOutputToSizedImage(InferenceContext* c) {
+ ShapeHandle input;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+ DimensionHandle batch = c->Dim(input, 0);
+ DimensionHandle depth = c->Dim(input, 3);
+ std::vector<int32> size_;
+ c->GetAttr("size", &size_);
+ DimensionHandle height = c->MakeDim(size_[0]);
+ DimensionHandle width = c->MakeDim(size_[1]);
+ c->set_output(0, c->MakeShape({batch, height, width, depth}));
+ return Status::OK();
+}
+
+REGISTER_OP("Downsample")
+ .Input("input: float32")
+ .Attr("size: list(int) >= 2")
+ .Output("output: float32")
+ .SetShapeFn(SetOutputToSizedImage);
+
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc
new file mode 100644
index 0000000..b5d9602
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc
@@ -0,0 +1,48 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class FlowWarpKernel : public OpKernel {
+ public:
+ explicit FlowWarpKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input image and flow and verify dimensions
+ const Tensor& input_t = ctx->input(0);
+ const Tensor& flow_t = ctx->input(1);
+
+ OP_REQUIRES(ctx, input_t.dims() == 4,
+ errors::InvalidArgument("Input image must have rank 4"));
+ OP_REQUIRES(ctx, flow_t.dims() == 4,
+ errors::InvalidArgument("Input flow must have rank 4"));
+ OP_REQUIRES(ctx,
+ input_t.dim_size(0) == flow_t.dim_size(0) && input_t.dim_size(
+ 1) == flow_t.dim_size(1) && input_t.dim_size(2) == flow_t.dim_size(2),
+ errors::InvalidArgument(
+ "Input image and flow must have same N x H x W dimensions"));
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_t.shape(), &output_t));
+
+ // Perform flow augmentation
+ auto input = input_t.tensor<float, 4>();
+ auto flow = flow_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ FlowWarp(ctx->eigen_gpu_device(), input, flow, output);
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowWarp")
+ .Device(DEVICE_GPU),
+ FlowWarpKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc
new file mode 100644
index 0000000..2007151
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc
@@ -0,0 +1,130 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#define RA_TILE 32
+#define RA_ROWS 8
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void FlowWarpKernel(
+ const float *image,
+ const float *flow,
+ float *warped,
+ const int batch_size,
+ const int channels,
+ const int cblocks,
+ const int width,
+ const int wblocks,
+ const int height,
+ const int width_height) {
+ int y = blockIdx.y;
+ int n = blockIdx.z;
+
+ __shared__ float x2_buf[FW_TILE_X], y2_buf[FW_TILE_X];
+ __shared__ float buffer[FW_TILE_C][FW_TILE_X + 1];
+
+ int x;
+ int c;
+
+ x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if ((threadIdx.y == 0) && (x < width)) {
+ const int idx = ((n * height + y) * width + x) * 2;
+ x2_buf[threadIdx.x] = float(x) + flow[idx];
+ y2_buf[threadIdx.x] = float(y) + flow[idx + 1];
+ }
+
+ __syncthreads();
+
+ float x2 = x2_buf[threadIdx.y];
+ float y2 = y2_buf[threadIdx.y];
+
+ int ix2_L = int(x2);
+ int iy2_T = int(y2);
+ int ix2_R = min(ix2_L + 1, width - 1);
+ int iy2_B = min(iy2_T + 1, height - 1);
+
+ int off_TL = ((n * height + iy2_T) * width + ix2_L) * channels;
+ int off_TR = ((n * height + iy2_T) * width + ix2_R) * channels;
+ int off_BL = ((n * height + iy2_B) * width + ix2_L) * channels;
+ int off_BR = ((n * height + iy2_B) * width + ix2_R) * channels;
+
+ float alpha = x2 - ix2_L;
+ float beta = y2 - iy2_T;
+ float coeffTL = (1 - alpha) * (1 - beta);
+ float coeffTR = alpha * (1 - beta);
+ float coeffBL = (1 - alpha) * beta;
+ float coeffBR = alpha * beta;
+
+ for (int cb = 0; cb < cblocks; cb++) {
+ __syncthreads();
+
+ buffer[threadIdx.y][threadIdx.x] = 0.0;
+
+ __syncthreads();
+
+ c = cb * FW_TILE_C + threadIdx.x;
+
+ if ((x2 >= 0) && (y2 >= 0) && (x2 < width) && (y2 < height) && (c < channels)) {
+ buffer[threadIdx.y][threadIdx.x] = // buffer [x][c]
+ coeffTL * image[off_TL + c] +
+ coeffTR * image[off_TR + c] +
+ coeffBL * image[off_BL + c] +
+ coeffBR * image[off_BR + c];
+ }
+
+ __syncthreads();
+
+ c = cb * FW_TILE_C + threadIdx.y;
+ x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if ((c < channels) && (x < width)) {
+ warped[((n * height + y) * width + x) * channels + c] = buffer[threadIdx.x][threadIdx.y];
+ }
+ }
+}
+
+void FlowWarp(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::Tensor output) {
+ const int batch_size = input.dimension(0);
+ const int height = input.dimension(1);
+ const int width = input.dimension(2);
+ const int channels = input.dimension(3);
+
+ const int width_height = width * height;
+ int wblocks = ((width - 1) / FW_TILE_X + 1);
+ int cblocks = ((channels - 1) / FW_TILE_C + 1);
+ dim3 warpThreads(FW_TILE_X, FW_TILE_C);
+ dim3 warpBlocks(wblocks, height, batch_size);
+
+ cudaMemset(output.data(), 0, batch_size * height * width * 2 * sizeof(float));
+
+ FlowWarpKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
+ input.data(),
+ flow.data(),
+ output.data(),
+ batch_size,
+ channels,
+ cblocks,
+ width,
+ wblocks,
+ height,
+ width_height);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.h b/Codes/flownet2/src/ops/flow_warp/flow_warp.h
new file mode 100644
index 0000000..2780316
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.h
@@ -0,0 +1,28 @@
+#ifndef FLOWNET_FLOWWARP_H_
+#define FLOWNET_FLOWWARP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+#define FW_THREADS 32
+#define FW_TILE_X FW_THREADS
+#define FW_TILE_C FW_THREADS
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void FlowWarp(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::Tensor output);
+
+void FlowWarpGrad(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor image,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::ConstTensor gradient,
+ typename TTypes<float, 4>::Tensor image_grad,
+ typename TTypes<float, 4>::Tensor flow_grad);
+} // end namespace tensorflow
+
+#endif // FLOWNET_FLOWWARP_H_
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc
new file mode 100644
index 0000000..9f3e7ea
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc
@@ -0,0 +1,57 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class FlowWarpGradKernel : public OpKernel {
+ public:
+ explicit FlowWarpGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input image and flow and verify dimensions
+ const Tensor& image_t = ctx->input(0);
+ const Tensor& flow_t = ctx->input(1);
+ const Tensor& grad_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, image_t.dims() == 4,
+ errors::InvalidArgument("Input image must have rank 4"));
+ OP_REQUIRES(ctx, flow_t.dims() == 4,
+ errors::InvalidArgument("Input flow must have rank 4"));
+ OP_REQUIRES(ctx,
+ image_t.dim_size(0) == flow_t.dim_size(0) && image_t.dim_size(
+ 1) == flow_t.dim_size(1) && image_t.dim_size(2) == flow_t.dim_size(2),
+ errors::InvalidArgument(
+ "Input image and flow must have same N x H x W dimensions"));
+
+ // Allocate the memory for the output
+ Tensor *image_grad_t;
+ Tensor *flow_grad_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, image_t.shape(), &image_grad_t));
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, flow_t.shape(), &flow_grad_t));
+
+ auto image = image_t.tensor<float, 4>();
+ auto flow = flow_t.tensor<float, 4>();
+ auto gradient = grad_t.tensor<float, 4>();
+ auto image_grad = image_grad_t->tensor<float, 4>();
+ auto flow_grad = flow_grad_t->tensor<float, 4>();
+
+ FlowWarpGrad(ctx->eigen_gpu_device(),
+ image,
+ flow,
+ gradient,
+ image_grad,
+ flow_grad);
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowWarpGrad")
+ .Device(DEVICE_GPU),
+ FlowWarpGradKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc
new file mode 100644
index 0000000..25248c8
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc
@@ -0,0 +1,126 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "flow_warp.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void FlowWarpGradKernel(
+ const float *image,
+ float *image_grad,
+ const float *flow,
+ float *flow_grad,
+ const float *gradient,
+ int batch_size,
+ int channels,
+ int cblocks,
+ int width,
+ int wblocks,
+ int height,
+ int widthheight) {
+ int x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if (x >= width) return;
+
+ int y = blockIdx.y;
+ int n = blockIdx.z;
+
+ const int flow_idx = ((n * height + y) * width + x) * 2;
+ float x2 = float(x) + flow[flow_idx];
+ float y2 = float(y) + flow[flow_idx + 1];
+
+ if ((x2 >= 0.f) && (y2 >= 0.f) && (x2 < width) && (y2 < height)) {
+ int ix2_L = int(x2);
+ int iy2_T = int(y2);
+ int ix2_R = min(ix2_L + 1, width - 1);
+ int iy2_B = min(iy2_T + 1, height - 1);
+
+ float alpha = x2 - ix2_L;
+ float beta = y2 - iy2_T;
+
+ for (int c = 0; c < channels; c++) {
+ float warped_diff_value = gradient[((n * height + y) * width + x) * channels + c];
+ atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_L) * channels + c],
+ warped_diff_value * (1 - alpha) * (1 - beta));
+ atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_R) * channels + c],
+ warped_diff_value * alpha * (1 - beta));
+ atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_L) * channels + c],
+ warped_diff_value * (1 - alpha) * beta);
+ atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_R) * channels + c],
+ warped_diff_value * alpha * beta);
+ }
+
+ float gamma = iy2_B - y2;
+ float bot_diff = 0;
+
+ for (int c = 0; c < channels; c++) {
+ int ch_off = (n * channels + c) * height;
+ float temp = 0;
+ temp += gamma *
+ (image[((n * height + iy2_T) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
+ temp += (1 - gamma) *
+ (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_B) * width + ix2_L) * channels + c]);
+
+ bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
+ }
+ flow_grad[((n * height + y) * width + x) * 2] = bot_diff;
+
+ gamma = ix2_R - x2;
+ bot_diff = 0;
+
+ for (int c = 0; c < channels; c++) {
+ float temp = 0;
+ temp += gamma *
+ (image[((n * height + iy2_B) * width + ix2_L) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
+ temp += (1 - gamma) *
+ (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_R) * channels + c]);
+
+ bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
+ }
+ flow_grad[((n * height + y) * width + x) * 2 + 1] = bot_diff;
+ }
+}
+
+void FlowWarpGrad(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor image,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::ConstTensor gradient,
+ typename TTypes<float, 4>::Tensor image_grad,
+ typename TTypes<float, 4>::Tensor flow_grad) {
+ const int batch_size = image.dimension(0);
+ const int height = image.dimension(1);
+ const int width = image.dimension(2);
+ const int channels = image.dimension(3);
+ const int width_height = width * height;
+
+ int wblocks = ((width - 1) / FW_TILE_X + 1);
+ int cblocks = ((channels - 1) / FW_TILE_C + 1);
+ dim3 warpThreads(FW_TILE_X, 1);
+ dim3 warpBlocks(wblocks, height, batch_size);
+
+ cudaMemset(image_grad.data(), 0, batch_size * height * width * channels * sizeof(float));
+ cudaMemset(flow_grad.data(), 0, batch_size * height * width * 2 * sizeof(float));
+
+ FlowWarpGradKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
+ image.data(),
+ image_grad.data(),
+ flow.data(),
+ flow_grad.data(),
+ gradient.data(),
+ batch_size,
+ channels,
+ cblocks,
+ width,
+ wblocks,
+ height,
+ width_height);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc
new file mode 100644
index 0000000..aef9c74
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc
@@ -0,0 +1,23 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+REGISTER_OP("FlowWarp")
+.Input("image: float32")
+.Input("flow: float32")
+.Output("output: float32")
+.SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
+
+REGISTER_OP("FlowWarpGrad")
+.Input("image: float32")
+.Input("flow: float32")
+.Input("gradient: float32")
+.Output("image_grad: float32")
+.Output("flow_grad: float32")
+.SetShapeFn([](shape_inference::InferenceContext *c) {
+ c->set_output(0, c->input(0));
+ c->set_output(1, c->input(1));
+ return Status::OK();
+ });
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc
new file mode 100644
index 0000000..b93dfa6
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc
@@ -0,0 +1,420 @@
+#include "augmentation_base.h"
+
+#include <math.h>
+#include <random>
+
+namespace tensorflow {
+/** TransMat Functions **/
+void AugmentationLayerBase::TransMat::fromCoeff(AugmentationCoeff *coeff,
+ int out_width,
+ int out_height,
+ int src_width,
+ int src_height) {
+ leftMultiply(1, 0, -0.5 * out_width,
+ 0, 1, -0.5 * out_height);
+
+ if (coeff->angle) {
+ leftMultiply(cos(coeff->angle()), -sin(coeff->angle()), 0,
+ sin(coeff->angle()), cos(coeff->angle()), 0);
+ }
+
+ if (coeff->dx || coeff->dy) {
+ leftMultiply(1, 0, coeff->dx() * out_width,
+ 0, 1, coeff->dy() * out_height);
+ }
+
+ if (coeff->zoom_x || coeff->zoom_y) {
+ leftMultiply(1.0 / coeff->zoom_x(), 0, 0,
+ 0, 1.0 / coeff->zoom_y(), 0);
+ }
+
+ leftMultiply(1, 0, 0.5 * src_width,
+ 0, 1, 0.5 * src_height);
+}
+
+void AugmentationLayerBase::TransMat::fromTensor(const float *tensor_data) {
+ t0 = tensor_data[0];
+ t1 = tensor_data[1];
+ t2 = tensor_data[2];
+ t3 = tensor_data[3];
+ t4 = tensor_data[4];
+ t5 = tensor_data[5];
+}
+
+AugmentationLayerBase::TransMat AugmentationLayerBase::TransMat::inverse() {
+ float a = this->t0, b = this->t1, c = this->t2;
+ float d = this->t3, e = this->t4, f = this->t5;
+
+ float denom = a * e - b * d;
+
+ TransMat result;
+
+ result.t0 = e / denom;
+ result.t1 = b / -denom;
+ result.t2 = (c * e - b * f) / -denom;
+ result.t3 = d / -denom;
+ result.t4 = a / denom;
+ result.t5 = (c * d - a * f) / denom;
+
+ return result;
+}
+
+void AugmentationLayerBase::TransMat::leftMultiply(float u0,
+ float u1,
+ float u2,
+ float u3,
+ float u4,
+ float u5) {
+ float t0 = this->t0, t1 = this->t1, t2 = this->t2;
+ float t3 = this->t3, t4 = this->t4, t5 = this->t5;
+
+ this->t0 = t0 * u0 + t3 * u1;
+ this->t1 = t1 * u0 + t4 * u1;
+ this->t2 = t2 * u0 + t5 * u1 + u2;
+ this->t3 = t0 * u3 + t3 * u4;
+ this->t4 = t1 * u3 + t4 * u4;
+ this->t5 = t2 * u3 + t5 * u4 + u5;
+}
+
+void AugmentationLayerBase::TransMat::toIdentity() {
+ t0 = 1; t1 = 0; t2 = 0;
+ t3 = 0; t4 = 1; t5 = 0;
+}
+
+/** AugmentationCoeff Functions **/
+void AugmentationCoeff::clear() {
+ // Spatial variables
+ dx.clear();
+ dy.clear();
+ angle.clear();
+ zoom_x.clear();
+ zoom_y.clear();
+
+ // Chromatic variables
+ gamma.clear();
+ brightness.clear();
+ contrast.clear();
+ color1.clear();
+ color2.clear();
+ color3.clear();
+}
+
+void AugmentationCoeff::combine_with(const AugmentationCoeff& coeff) {
+ // Spatial types
+ if (coeff.dx) {
+ dx = dx() * coeff.dx();
+ }
+
+ if (coeff.dy) {
+ dy = dy() * coeff.dy();
+ }
+
+ if (coeff.angle) {
+ angle = angle() * coeff.angle();
+ }
+
+ if (coeff.zoom_x) {
+ zoom_x = zoom_x() * coeff.zoom_x();
+ }
+
+ if (coeff.zoom_y) {
+ zoom_y = zoom_y() * coeff.zoom_y();
+ }
+
+ // Chromatic types
+ if (coeff.gamma) {
+ gamma = gamma() * coeff.gamma();
+ }
+
+ if (coeff.brightness) {
+ brightness = brightness() * coeff.brightness();
+ }
+
+ if (coeff.contrast) {
+ contrast = contrast() * coeff.contrast();
+ }
+
+ if (coeff.color1) {
+ color1 = color1() * coeff.color1();
+ }
+
+ if (coeff.color2) {
+ color2 = color2() * coeff.color2();
+ }
+
+ if (coeff.color3) {
+ color3 = color3() * coeff.color3();
+ }
+}
+
+void AugmentationCoeff::replace_with(const AugmentationCoeff& coeff) {
+ // Spatial types
+ if (coeff.dx) {
+ dx = coeff.dx();
+ }
+
+ if (coeff.dy) {
+ dy = coeff.dy();
+ }
+
+ if (coeff.angle) {
+ angle = coeff.angle();
+ }
+
+ if (coeff.zoom_x) {
+ zoom_x = coeff.zoom_x();
+ }
+
+ if (coeff.zoom_y) {
+ zoom_y = coeff.zoom_y();
+ }
+
+ // Chromatic types
+ if (coeff.gamma) {
+ gamma = gamma() * coeff.gamma();
+ }
+
+ if (coeff.brightness) {
+ brightness = coeff.brightness();
+ }
+
+ if (coeff.contrast) {
+ contrast = coeff.contrast();
+ }
+
+ if (coeff.color1) {
+ color1 = coeff.color1();
+ }
+
+ if (coeff.color2) {
+ color2 = coeff.color2();
+ }
+
+ if (coeff.color3) {
+ color3 = coeff.color3();
+ }
+}
+
+/** AugmentationLayerBase Functions **/
+float AugmentationLayerBase::rng_generate(const AugmentationParam& param,
+ float discount_coeff,
+ const float default_value) {
+ std::random_device rd; // Will be used to obtain a seed for the random number
+ // engine
+ std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
+
+ float spread = param.spread * discount_coeff;
+
+ if (param.rand_type == "uniform_bernoulli") {
+ float tmp1 = 0.0;
+ bool tmp2 = false;
+
+ if (param.prob > 0.0) {
+ std::bernoulli_distribution bernoulli(param.prob);
+ tmp2 = bernoulli(gen);
+ }
+
+ if (!tmp2) {
+ return default_value;
+ }
+
+ if (param.spread > 0.0) {
+ std::uniform_real_distribution<> uniform(param.mean - spread,
+ param.mean + spread);
+ tmp1 = uniform(gen);
+ } else {
+ tmp1 = param.mean;
+ }
+
+ if (param.should_exp) {
+ tmp1 = exp(tmp1);
+ }
+
+ return tmp1;
+ } else if (param.rand_type == "gaussian_bernoulli") {
+ float tmp1 = 0.0;
+ bool tmp2 = false;
+
+ if (param.prob > 0.0) {
+ std::bernoulli_distribution bernoulli(param.prob);
+ tmp2 = bernoulli(gen);
+ }
+
+ if (!tmp2) {
+ return default_value;
+ }
+
+ if (spread > 0.0) {
+ std::normal_distribution<> normal(param.mean, spread);
+ tmp1 = normal(gen);
+ } else {
+ tmp1 = param.mean;
+ }
+
+ if (param.should_exp) {
+ tmp1 = exp(tmp1);
+ }
+
+ return tmp1;
+ } else {
+ throw "Unknown random type: " + param.rand_type;
+ }
+}
+
+void AugmentationLayerBase::generate_chromatic_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff) {
+ if (aug.gamma) {
+ coeff.gamma = rng_generate(aug.gamma(), discount_coeff, coeff.gamma.get_default());
+ }
+
+ if (aug.brightness) {
+ coeff.brightness =
+ rng_generate(aug.brightness(), discount_coeff, coeff.brightness.get_default());
+ }
+
+ if (aug.contrast) {
+ coeff.contrast = rng_generate(aug.contrast(), discount_coeff, coeff.contrast.get_default());
+ }
+
+ if (aug.color) {
+ coeff.color1 = rng_generate(aug.color(), discount_coeff, coeff.color1.get_default());
+ coeff.color2 = rng_generate(aug.color(), discount_coeff, coeff.color2.get_default());
+ coeff.color3 = rng_generate(aug.color(), discount_coeff, coeff.color3.get_default());
+ }
+}
+
+void AugmentationLayerBase::generate_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff) {
+ if (aug.translate) {
+ coeff.dx = rng_generate(aug.translate(), discount_coeff, coeff.dx.get_default());
+ coeff.dy = rng_generate(aug.translate(), discount_coeff, coeff.dy.get_default());
+ }
+
+ if (aug.rotate) {
+ coeff.angle = rng_generate(aug.rotate(), discount_coeff, coeff.angle.get_default());
+ }
+
+ if (aug.zoom) {
+ coeff.zoom_x = rng_generate(aug.zoom(), discount_coeff, coeff.zoom_x.get_default());
+ coeff.zoom_y = coeff.zoom_x();
+ }
+
+ if (aug.squeeze) {
+ float squeeze_coeff = rng_generate(aug.squeeze(), discount_coeff, 1.0);
+ coeff.zoom_x = coeff.zoom_x() * squeeze_coeff;
+ coeff.zoom_y = coeff.zoom_y() * squeeze_coeff;
+ }
+}
+
+void AugmentationLayerBase::generate_valid_spatial_coeffs(
+ float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff,
+ int src_width,
+ int src_height,
+ int out_width,
+ int out_height) {
+ int x, y;
+ float x1, y1, x2, y2;
+ int counter = 0;
+ int good_params = 0;
+ AugmentationCoeff incoming_coeff(coeff);
+
+ while (good_params < 4 && counter < 50) {
+ coeff.clear();
+ AugmentationLayerBase::generate_spatial_coeffs(discount_coeff, aug, coeff);
+ coeff.combine_with(incoming_coeff);
+
+ // Check if all 4 corners of the transformed image fit into the original
+ // image
+ good_params = 0;
+
+ for (x = 0; x < out_width; x += out_width - 1) {
+ for (y = 0; y < out_height; y += out_height - 1) {
+ // move the origin
+ x1 = x - 0.5 * out_width;
+ y1 = y - 0.5 * out_height;
+
+ // rotate
+ x2 = cos(coeff.angle()) * x1 - sin(coeff.angle()) * y1;
+ y2 = sin(coeff.angle()) * x1 + sin(coeff.angle()) * y1;
+
+ // translate
+ x2 = x2 + coeff.dx() * out_width;
+ y2 = y2 + coeff.dy() * out_height;
+
+ // zoom
+ x2 = x2 / coeff.zoom_x();
+ y2 = y2 / coeff.zoom_y();
+
+ // move the origin back
+ x2 = x2 + 0.5 * src_width;
+ y2 = y2 + 0.5 * src_height;
+
+ if (!((floor(x2) < 0) || (floor(x2) > src_width - 2.0) ||
+ (floor(y2) < 0) || (floor(y2) > src_height - 2.0))) {
+ good_params++;
+ }
+ }
+ }
+ counter++;
+ }
+
+ if (counter >= 50) {
+ printf("Warning: No suitable spatial transformation after %d attempts.\n", counter);
+ coeff.clear();
+ coeff.replace_with(incoming_coeff);
+ }
+}
+
+void AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(
+ const std::vector<AugmentationCoeff>& coeff_arr,
+ typename TTypes<float, 2>::Tensor& out)
+{
+ float *out_ptr = out.data();
+ int counter = 0;
+
+ for (AugmentationCoeff coeff : coeff_arr) {
+ out_ptr[counter + 0] = coeff.gamma();
+ out_ptr[counter + 1] = coeff.brightness();
+ out_ptr[counter + 2] = coeff.contrast();
+ out_ptr[counter + 3] = coeff.color1();
+ out_ptr[counter + 4] = coeff.color2();
+ out_ptr[counter + 5] = coeff.color3();
+ counter += 6;
+ }
+}
+
+void AugmentationLayerBase::copy_spatial_coeffs_to_tensor(
+ const std::vector<AugmentationCoeff>& coeff_arr,
+ const int out_width,
+ const int out_height,
+ const int src_width,
+ const int src_height,
+ typename TTypes<float, 2>::Tensor& out,
+ const bool invert)
+{
+ float *out_ptr = out.data();
+ int counter = 0;
+ TransMat t;
+
+ for (AugmentationCoeff coeff : coeff_arr) {
+ t.toIdentity();
+ t.fromCoeff(&coeff, out_width, out_height, src_width, src_height);
+
+ if (invert) {
+ t = t.inverse();
+ }
+
+ out_ptr[counter + 0] = t.t0;
+ out_ptr[counter + 1] = t.t1;
+ out_ptr[counter + 2] = t.t2;
+ out_ptr[counter + 3] = t.t3;
+ out_ptr[counter + 4] = t.t4;
+ out_ptr[counter + 5] = t.t5;
+ counter += 6;
+ }
+}
+}
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h
new file mode 100644
index 0000000..d2aba2c
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h
@@ -0,0 +1,228 @@
+#ifndef AUGMENTATION_LAYER_BASE_H_
+#define AUGMENTATION_LAYER_BASE_H_
+
+#include "tensorflow/core/framework/tensor_types.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace tensorflow {
+template<typename T>
+class OptionalType {
+ public:
+ OptionalType(const T default_value) : default_value(default_value), has_value(false) {}
+
+ operator bool() const {
+ return has_value;
+ }
+
+ OptionalType& operator=(T val) {
+ has_value = true;
+ value = val;
+ return *this;
+ }
+
+ const T operator()() const {
+ return has_value ? value : default_value;
+ }
+
+ void clear() {
+ has_value = false;
+ }
+
+ const T get_default() {
+ return default_value;
+ }
+
+ private:
+ T value;
+ bool has_value;
+ const T default_value;
+};
+
+class AugmentationCoeff {
+ public:
+ // Spatial Types
+ OptionalType<float>dx;
+ OptionalType<float>dy;
+ OptionalType<float>angle;
+ OptionalType<float>zoom_x;
+ OptionalType<float>zoom_y;
+
+ // Chromatic Types
+ OptionalType<float>gamma;
+ OptionalType<float>brightness;
+ OptionalType<float>contrast;
+ OptionalType<float>color1;
+ OptionalType<float>color2;
+ OptionalType<float>color3;
+
+ AugmentationCoeff() : dx(0.0), dy(0.0), angle(0.0), zoom_x(1.0), zoom_y(1.0), gamma(1.0),
+ brightness(0.0), contrast(1.0), color1(1.0), color2(1.0), color3(1.0) {}
+
+ AugmentationCoeff(const AugmentationCoeff& coeff) : AugmentationCoeff() {
+ replace_with(coeff);
+ }
+
+ void clear();
+
+ void combine_with(const AugmentationCoeff& coeff);
+
+ void replace_with(const AugmentationCoeff& coeff);
+};
+
+typedef struct AugmentationParam {
+ std::string rand_type;
+ bool should_exp;
+ float mean;
+ float spread;
+ float prob;
+} AugmentationParam;
+
+class AugmentationParams {
+ public:
+ int crop_height;
+ int crop_width;
+
+ // Spatial options
+ OptionalType<struct AugmentationParam>translate;
+ OptionalType<struct AugmentationParam>rotate;
+ OptionalType<struct AugmentationParam>zoom;
+ OptionalType<struct AugmentationParam>squeeze;
+
+ // Chromatic options
+ OptionalType<struct AugmentationParam>gamma;
+ OptionalType<struct AugmentationParam>brightness;
+ OptionalType<struct AugmentationParam>contrast;
+ OptionalType<struct AugmentationParam>color;
+
+ inline AugmentationParams(int crop_height,
+ int crop_width,
+ std::vector<std::string>params_name,
+ std::vector<std::string>params_rand_type,
+ std::vector<bool> params_exp,
+ std::vector<float> params_mean,
+ std::vector<float> params_spread,
+ std::vector<float> params_prob) :
+ crop_height(crop_height),
+ crop_width(crop_width),
+ translate(AugmentationParam()),
+ rotate(AugmentationParam()),
+ zoom(AugmentationParam()),
+ squeeze(AugmentationParam()),
+ gamma(AugmentationParam()),
+ brightness(AugmentationParam()),
+ contrast(AugmentationParam()),
+ color(AugmentationParam()) {
+ for (int i = 0; i < params_name.size(); i++) {
+ const std::string name = params_name[i];
+ const std::string rand_type = params_rand_type[i];
+ const bool should_exp = params_exp[i];
+ const float mean = params_mean[i];
+ const float spread = params_spread[i];
+ const float prob = params_prob[i];
+
+ struct AugmentationParam param = { rand_type, should_exp, mean, spread, prob };
+
+ if (name == "translate") {
+ this->translate = param;
+ } else if (name == "rotate") {
+ this->rotate = param;
+ } else if (name == "zoom") {
+ this->zoom = param;
+ } else if (name == "squeeze") {
+ this->squeeze = param;
+ } else if (name == "noise") {
+ // NoOp: We handle noise on the Python side
+ } else if (name == "gamma") {
+ this->gamma = param;
+ } else if (name == "brightness") {
+ this->brightness = param;
+ } else if (name == "contrast") {
+ this->contrast = param;
+ } else if (name == "color") {
+ this->color = param;
+ } else {
+ std::cout << "Ignoring unknown augmentation parameter: " << name << std::endl;
+ }
+ }
+ }
+
+ bool should_do_spatial_transform() {
+ return this->translate || this->rotate || this->zoom || this->squeeze;
+ }
+
+ bool should_do_chromatic_transform() {
+ return this->gamma || this->brightness || this->contrast || this->color;
+ }
+};
+
+class AugmentationLayerBase {
+ public:
+ class TransMat {
+ /**
+ * Translation matrix class for spatial augmentation
+ * | 0 1 2 |
+ * | 3 4 5 |
+ */
+
+ public:
+ float t0, t1, t2;
+ float t3, t4, t5;
+
+
+ void fromCoeff(AugmentationCoeff *coeff,
+ int out_width,
+ int out_height,
+ int src_width,
+ int src_height);
+
+ void fromTensor(const float *tensor_data);
+
+ TransMat inverse();
+
+ void leftMultiply(float u0,
+ float u1,
+ float u2,
+ float u3,
+ float u4,
+ float u5);
+
+ void toIdentity();
+ };
+
+ // TODO: Class ChromaticCoeffs
+
+ static float rng_generate(const AugmentationParam& param,
+ float discount_coeff,
+ const float default_value);
+
+ static void clear_spatial_coeffs(AugmentationCoeff& coeff);
+ static void generate_chromatic_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff);
+ static void generate_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff);
+ static void generate_valid_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff,
+ int src_width,
+ int src_height,
+ int out_width,
+ int out_height);
+
+ static void copy_chromatic_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
+ typename TTypes<float, 2>::Tensor& out);
+ static void copy_spatial_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
+ const int out_width,
+ const int out_height,
+ const int src_width,
+ const int src_height,
+ typename TTypes<float, 2>::Tensor& out,
+ const bool invert = false);
+};
+} // namespace tensorflow
+
+#endif // AUGMENTATION_LAYER_BASE_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc
new file mode 100644
index 0000000..77b8c83
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc
@@ -0,0 +1,461 @@
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include "augmentation_base.h"
+#include "data_augmentation.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include "tensorflow/core/util/work_sharder.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+inline float clamp(float f, float a, float b) {
+ return fmaxf(a, fminf(f, b));
+}
+
+template<>
+void Augment(OpKernelContext *context,
+ const CPUDevice& d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs) {
+ const int64 channel_count = batch_size * out_height * out_width;
+ const int kCostPerChannel = 10;
+ const DeviceBase::CpuWorkerThreads& worker_threads =
+ *context->device()->tensorflow_cpu_worker_threads();
+
+ Shard(worker_threads.num_threads,
+ worker_threads.workers,
+ channel_count,
+ kCostPerChannel,
+ [batch_size, channels, src_width,
+ src_height, src_count, out_width, out_height, src_data,
+ out_data, transMats, chromatic_coeffs](
+ int64 start_channel, int64 end_channel) {
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ for (int index = start_channel; index < end_channel; index++) {
+ int x = index % out_width;
+ int y = (index / out_width) % out_height;
+ int n = index / out_width / out_height;
+
+ const float *transMat = transMats + n * 6;
+
+ float gamma, brightness, contrast;
+
+ if (chromatic_coeffs) {
+ gamma = chromatic_coeffs[n * 6 + 0];
+ brightness = chromatic_coeffs[n * 6 + 1];
+ contrast = chromatic_coeffs[n * 6 + 2];
+ }
+
+ float xpos = x * transMat[0] + y * transMat[1] + transMat[2];
+ float ypos = x * transMat[3] + y * transMat[4] + transMat[5];
+
+ xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f);
+ ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f);
+
+ float tlx = floor(xpos);
+ float tly = floor(ypos);
+
+ float xdist = xpos - tlx;
+ float ydist = ypos - tly;
+
+ int srcTLIdxOffset = ((n * src_height + (int)tly) * src_width + (int)tlx) * channels;
+
+ // ((n * src_height + tly) * src_width + (tlx + 1)) * channels
+ int srcTRIdxOffset = srcTLIdxOffset + channels;
+
+ // ((n * src_height + (tly + 1)) * src_width + tlx) * channels
+ int srcBLIdxOffset = srcTLIdxOffset + channels * src_width;
+
+ // ((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels
+ int srcBRIdxOffset = srcTLIdxOffset + channels + channels * src_width;
+
+ // Variables for chromatic transform
+ int data_index[3];
+ float rgb[3];
+ float mean_in = 0;
+ float mean_out = 0;
+
+ for (int c = 0; c < channels; c++) {
+ // Bilinear interpolation
+ int srcTLIdx = srcTLIdxOffset + c;
+ int srcTRIdx = std::min(srcTRIdxOffset + c, src_count);
+ int srcBLIdx = std::min(srcBLIdxOffset + c, src_count);
+ int srcBRIdx = std::min(srcBRIdxOffset + c, src_count);
+
+ float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx]
+ + (xdist) * (ydist) * src_data[srcBRIdx]
+ + (1 - xdist) * (ydist) * src_data[srcBLIdx]
+ + (xdist) * (1 - ydist) * src_data[srcTRIdx];
+
+ if (chromatic_coeffs) {
+ // Gather data for chromatic transform
+ data_index[c] = index * channels + c;
+ rgb[c] = dest;
+ mean_in += rgb[c];
+
+ // Note: coeff[3] == color1, coeff[4] == color2, ...
+ rgb[c] *= chromatic_coeffs[n * 6 + (3 + c)];
+
+ mean_out += rgb[c];
+ } else {
+ out_data[index * channels + c] = dest;
+ }
+ }
+
+ float brightness_coeff = mean_in / (mean_out + 0.01f);
+
+ if (chromatic_coeffs) {
+ // Chromatic transformation
+ for (int c = 0; c < channels; c++) {
+ // compensate brightness
+ rgb[c] = clamp(rgb[c] * brightness_coeff, 0.0f, 1.0f);
+
+ // gamma change
+ rgb[c] = pow(rgb[c], gamma);
+
+ // brightness change
+ rgb[c] = rgb[c] + brightness;
+
+ // contrast change
+ rgb[c] = 0.5f + (rgb[c] - 0.5f) * contrast;
+
+ out_data[data_index[c]] = clamp(rgb[c], 0.0f, 1.0f);
+ }
+ }
+ }
+ });
+}
+
+template<typename Device>
+class DataAugmentation : public OpKernel {
+ public:
+ explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the crop [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+ OP_REQUIRES(ctx, crop_.size() == 2,
+ errors::InvalidArgument("crop must be 2 dimensions"));
+
+ // TODO: Verify params are all the same length
+
+ // Get the tensors for params_a and verify their dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", &params_a_name_));
+ OP_REQUIRES_OK(ctx,
+ ctx->GetAttr("params_a_rand_type", &params_a_rand_type_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", &params_a_exp_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", &params_a_mean_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread", &params_a_spread_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", &params_a_prob_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_coeff_schedule", &params_a_coeff_schedule_));
+
+ // Get the tensors for params_b and verify their dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", &params_b_name_));
+ OP_REQUIRES_OK(ctx,
+ ctx->GetAttr("params_b_rand_type", &params_b_rand_type_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", &params_b_exp_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", &params_b_mean_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread", &params_b_spread_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", &params_b_prob_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_coeff_schedule", &params_b_coeff_schedule_));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images
+ const Tensor& input_a_t = ctx->input(0);
+ const Tensor& input_b_t = ctx->input(1);
+
+ // Get the global step value
+ const Tensor& global_step_t = ctx->input(2);
+ auto global_step_eigen = global_step_t.tensor<int64, 0>();
+ const int64 global_step = global_step_eigen.data()[0];
+
+ // Dimension constants
+ const int batch_size = input_a_t.dim_size(0);
+ const int src_height = input_a_t.dim_size(1);
+ const int src_width = input_a_t.dim_size(2);
+ const int channels = input_a_t.dim_size(3);
+ const int src_count = batch_size * src_height * src_width * channels;
+ const int out_height = crop_[0];
+ const int out_width = crop_[1];
+ const int out_count = batch_size * out_height * out_width * channels;
+
+ // All tensors for this op
+ Tensor chromatic_coeffs_a_t;
+ Tensor chromatic_coeffs_b_t;
+
+ // Allocate the memory for the output images
+ Tensor *output_a_t;
+ Tensor *output_b_t;
+
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(0, TensorShape({ batch_size, crop_[0], crop_[1],
+ channels }), &output_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(1, TensorShape({ batch_size, crop_[0], crop_[1],
+ channels }), &output_b_t));
+
+ // Allocate the memory for the output spatial transforms
+ Tensor *spat_transform_a_t;
+ Tensor *spat_transform_b_t;
+
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(2, TensorShape({ batch_size, 6 }),
+ &spat_transform_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(3, TensorShape({ batch_size, 6 }),
+ &spat_transform_b_t));
+
+ // Compute discount for coefficients if using a schedule
+ float discount_coeff_a = 1.0;
+ float discount_coeff_b = 1.0;
+
+ if (params_a_coeff_schedule_.size() == 3) {
+ float half_life = params_a_coeff_schedule_[0];
+ float initial_coeff = params_a_coeff_schedule_[1];
+ float final_coeff = params_a_coeff_schedule_[2];
+ discount_coeff_a = initial_coeff + (final_coeff - initial_coeff) *
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0);
+ }
+
+ if (params_b_coeff_schedule_.size() == 3) {
+ if (params_a_coeff_schedule_.size() == 3) {
+ discount_coeff_b = discount_coeff_a;
+ } else {
+ float half_life = params_b_coeff_schedule_[0];
+ float initial_coeff = params_b_coeff_schedule_[1];
+ float final_coeff = params_b_coeff_schedule_[2];
+ discount_coeff_b = initial_coeff + (final_coeff - initial_coeff) *
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0);
+ }
+ }
+
+ /*** BEGIN AUGMENTATION TO IMAGE A ***/
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto output_a = output_a_t->tensor<float, 4>();
+
+ // Load augmentation parameters for image A
+ AugmentationParams aug_a = AugmentationParams(out_height, out_width,
+ params_a_name_,
+ params_a_rand_type_,
+ params_a_exp_,
+ params_a_mean_,
+ params_a_spread_,
+ params_a_prob_);
+
+ std::vector<AugmentationCoeff> coeffs_a;
+
+
+ bool gen_spatial_transform = aug_a.should_do_spatial_transform();
+ bool gen_chromatic_transform = aug_a.should_do_chromatic_transform();
+
+ for (int n = 0; n < batch_size; n++) {
+ AugmentationCoeff coeff;
+
+ if (gen_spatial_transform) {
+ AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_a, aug_a, coeff,
+ src_width, src_height,
+ out_width, out_height);
+ }
+
+ if (gen_chromatic_transform) {
+ AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_a, aug_a, coeff);
+ }
+
+ coeffs_a.push_back(coeff);
+ }
+
+ // Copy spatial coefficients A to the output Tensor on the CPU
+ // (output for FlowAugmentation)
+ auto spat_transform_a = spat_transform_a_t->tensor<float, 2>();
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_a);
+
+ float *chromatic_coeffs_a_data = NULL;
+
+ if (gen_chromatic_transform) {
+ // Allocate a temporary tensor to hold the chromatic coefficients
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, 6 }),
+ &chromatic_coeffs_a_t));
+
+ // Copy the chromatic coefficients A to a temporary Tensor on the CPU
+ auto chromatic_coeffs_a = chromatic_coeffs_a_t.tensor<float, 2>();
+ AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_a, chromatic_coeffs_a);
+ chromatic_coeffs_a_data = chromatic_coeffs_a.data();
+ }
+
+ // Perform augmentation either on CPU or GPU
+ Augment<Device>(
+ ctx,
+ ctx->eigen_device<Device>(),
+ batch_size,
+ channels,
+ src_width,
+ src_height,
+ src_count,
+ out_width,
+ out_height,
+ input_a.data(),
+ output_a.data(),
+ spat_transform_a.data(),
+ chromatic_coeffs_a_data);
+
+ /*** END AUGMENTATION TO IMAGE A ***/
+
+ /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+ AugmentationParams aug_b = AugmentationParams(out_height, out_width,
+ params_b_name_,
+ params_b_rand_type_,
+ params_b_exp_,
+ params_b_mean_,
+ params_b_spread_,
+ params_b_prob_);
+
+ std::vector<AugmentationCoeff> coeffs_b;
+
+ bool gen_spatial_transform_b = aug_b.should_do_spatial_transform();
+ bool gen_chromatic_transform_b = aug_b.should_do_chromatic_transform();
+
+ for (int n = 0; n < batch_size; n++) {
+ AugmentationCoeff coeff(coeffs_a[n]);
+
+ // If we did a spatial transform on image A, we need to do the same one
+ // (+ possibly more) on image B
+ if (gen_spatial_transform_b) {
+ AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_b, aug_b, coeff,
+ src_width, src_height,
+ out_width, out_height);
+ }
+
+ if (gen_chromatic_transform_b) {
+ AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_b, aug_b, coeff);
+ }
+
+ coeffs_b.push_back(coeff);
+ }
+
+ /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+
+ /*** BEGIN AUGMENTATION TO IMAGE B ***/
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output_b = output_b_t->tensor<float, 4>();
+
+ // Copy spatial coefficients B to the output Tensor on the CPU
+ auto spat_transform_b = spat_transform_b_t->tensor<float, 2>();
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_b);
+
+ float *chromatic_coeffs_b_data = NULL;
+
+ if (gen_chromatic_transform || gen_chromatic_transform_b) {
+ // Allocate a temporary tensor to hold the chromatic coefficients
+ tensorflow::AllocatorAttributes pinned_allocator;
+ pinned_allocator.set_on_host(true);
+ pinned_allocator.set_gpu_compatible(true);
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, 6 }),
+ &chromatic_coeffs_b_t, pinned_allocator));
+
+ // Copy the chromatic coefficients A to a temporary Tensor on the CPU
+ auto chromatic_coeffs_b = chromatic_coeffs_b_t.tensor<float, 2>();
+ AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_b, chromatic_coeffs_b);
+ chromatic_coeffs_b_data = chromatic_coeffs_b.data();
+ }
+
+ // Perform augmentation either on CPU or GPU
+ Augment<Device>(
+ ctx,
+ ctx->eigen_device<Device>(),
+ batch_size,
+ channels,
+ src_width,
+ src_height,
+ src_count,
+ out_width,
+ out_height,
+ input_b.data(),
+ output_b.data(),
+ spat_transform_b.data(),
+ chromatic_coeffs_b_data);
+
+ // FlowAugmentation needs the inverse
+ // TODO: To avoid rewriting, can we invert when we read on the
+ // FlowAugmentation side?
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_b,
+ true);
+
+ /*** END AUGMENTATION TO IMAGE B ***/
+ }
+
+ private:
+ std::vector<int32>crop_;
+
+ // Params A
+ std::vector<string>params_a_name_;
+ std::vector<string>params_a_rand_type_;
+ std::vector<bool>params_a_exp_;
+ std::vector<float>params_a_mean_;
+ std::vector<float>params_a_spread_;
+ std::vector<float>params_a_prob_;
+ std::vector<float>params_a_coeff_schedule_;
+
+ // Params B
+ std::vector<string>params_b_name_;
+ std::vector<string>params_b_rand_type_;
+ std::vector<bool>params_b_exp_;
+ std::vector<float>params_b_mean_;
+ std::vector<float>params_b_spread_;
+ std::vector<float>params_b_prob_;
+ std::vector<float>params_b_coeff_schedule_;
+};
+
+
+REGISTER_KERNEL_BUILDER(Name("DataAugmentation")
+ .Device(DEVICE_CPU)
+ .HostMemory("global_step")
+ .HostMemory("transforms_from_a")
+ .HostMemory("transforms_from_b"),
+ DataAugmentation<CPUDevice>)
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(Name("DataAugmentation")
+ .Device(DEVICE_GPU)
+ .HostMemory("global_step")
+ .HostMemory("transforms_from_a")
+ .HostMemory("transforms_from_b"),
+ DataAugmentation<GPUDevice>)
+#endif // GOOGLE_CUDA
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc
new file mode 100644
index 0000000..7a2101d
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc
@@ -0,0 +1,348 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "augmentation_base.h"
+#include "data_augmentation.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+inline __device__ __host__ float clamp(float f, float a, float b) {
+ return fmaxf(a, fminf(f, b));
+}
+
+__global__ void SpatialAugmentation(
+ const int32 nthreads,
+ const int src_width,
+ const int src_height,
+ const int channels,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ int c = index % channels;
+ int x = (index / channels) % out_width;
+ int y = (index / channels / out_width) % out_height;
+ int n = index / channels / out_width / out_height;
+
+ const float *transMat = transMats + n * 6;
+ float xpos = x * transMat[0] + y * transMat[1] + transMat[2];
+ float ypos = x * transMat[3] + y * transMat[4] + transMat[5];
+
+ xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f);
+ ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f);
+
+ float tlx = floor(xpos);
+ float tly = floor(ypos);
+
+ // Bilinear interpolation
+ int srcTLIdx = ((n * src_height + tly) * src_width + tlx) * channels + c;
+ int srcTRIdx = min((int)(((n * src_height + tly) * src_width + (tlx + 1)) * channels + c),
+ src_count);
+ int srcBLIdx = min((int)(((n * src_height + (tly + 1)) * src_width + tlx) * channels + c),
+ src_count);
+ int srcBRIdx = min((int)(((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels + c),
+ src_count);
+
+ float xdist = xpos - tlx;
+ float ydist = ypos - tly;
+
+ float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx]
+ + (xdist) * (ydist) * src_data[srcBRIdx]
+ + (1 - xdist) * (ydist) * src_data[srcBLIdx]
+ + (xdist) * (1 - ydist) * src_data[srcTRIdx];
+
+ out_data[index] = dest;
+ }
+}
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template<>
+void Augment(OpKernelContext *context,
+ const GPUDevice& d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs) {
+ const int out_count = batch_size * out_height * out_width * channels;
+ CudaLaunchConfig config = GetCudaLaunchConfig(out_count, d);
+
+ printf("Chromatic transform not yet implemented on GPU, ignoring.");
+
+ SpatialAugmentation << < config.block_count, config.thread_per_block, 0, d.stream() >> > (
+ config.virtual_thread_count, src_width, src_height, channels, src_count,
+ out_width, out_height,
+ src_data, out_data, transMats);
+}
+
+//
+// template<typename Device>
+// class DataAugmentation : public OpKernel {
+// public:
+// explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+// // Get the crop [height, width] tensor and verify its dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+// OP_REQUIRES(ctx, crop_.size() == 2,
+// errors::InvalidArgument("crop must be 2 dimensions"));
+//
+// // TODO: Verify params are all the same length
+//
+// // Get the tensors for params_a and verify their dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", &params_a_name_));
+// OP_REQUIRES_OK(ctx,
+// ctx->GetAttr("params_a_rand_type",
+// &params_a_rand_type_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", &params_a_exp_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", &params_a_mean_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread",
+// &params_a_spread_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", &params_a_prob_));
+//
+// // Get the tensors for params_b and verify their dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", &params_b_name_));
+// OP_REQUIRES_OK(ctx,
+// ctx->GetAttr("params_b_rand_type",
+// &params_b_rand_type_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", &params_b_exp_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", &params_b_mean_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread",
+// &params_b_spread_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", &params_b_prob_));
+// }
+//
+// void Compute(OpKernelContext *ctx) override {
+// const GPUDevice& device = ctx->eigen_gpu_device();
+//
+// // Get the input images
+// const Tensor& input_a_t = ctx->input(0);
+// const Tensor& input_b_t = ctx->input(1);
+//
+// // Dimension constants
+// const int batch_size = input_a_t.dim_size(0);
+// const int src_height = input_a_t.dim_size(1);
+// const int src_width = input_a_t.dim_size(2);
+// const int channels = input_a_t.dim_size(3);
+// const int src_count = batch_size * src_height * src_width * channels;
+// const int out_height = crop_[0];
+// const int out_width = crop_[1];
+// const int out_count = batch_size * out_height * out_width * channels;
+//
+// // Allocate the memory for the output images
+// Tensor *output_a_t;
+// Tensor *output_b_t;
+//
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(0, TensorShape({ batch_size,
+// crop_[0], crop_[1],
+// channels }),
+// &output_a_t));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(1, TensorShape({ batch_size,
+// crop_[0], crop_[1],
+// channels }),
+// &output_b_t));
+//
+// // Allocate the memory for the output spatial transforms
+// Tensor *spat_transform_a_t;
+// Tensor *spat_transform_b_t;
+//
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(2, TensorShape({ batch_size, 6 }),
+// &spat_transform_a_t));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(3, TensorShape({ batch_size, 6 }),
+// &spat_transform_b_t));
+//
+// // Allocate temporary pinned memory for the spatial transforms to be
+// used
+// // on the GPU
+// tensorflow::AllocatorAttributes pinned_allocator;
+// pinned_allocator.set_on_host(true);
+// pinned_allocator.set_gpu_compatible(true);
+//
+// Tensor spat_transform_a_pinned_t;
+// Tensor spat_transform_b_pinned_t;
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_temp(DataTypeToEnum<float>::value,
+// TensorShape({ batch_size, 6 }),
+// &spat_transform_a_pinned_t,
+// pinned_allocator));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_temp(DataTypeToEnum<float>::value,
+// TensorShape({ batch_size, 6 }),
+// &spat_transform_b_pinned_t,
+// pinned_allocator));
+// auto spat_transform_a_pinned = spat_transform_a_pinned_t.tensor<float,
+// 2>();
+// auto spat_transform_b_pinned = spat_transform_b_pinned_t.tensor<float,
+// 2>();
+//
+// /*** BEGIN AUGMENTATION TO IMAGE A ***/
+// auto input_a = input_a_t.tensor<float, 4>();
+// auto output_a = output_a_t->tensor<float, 4>();
+//
+// // Load augmentation parameters for image A
+// AugmentationParams aug_a = AugmentationParams(out_height, out_width,
+// params_a_name_,
+// params_a_rand_type_,
+// params_a_exp_,
+// params_a_mean_,
+// params_a_spread_,
+// params_a_prob_);
+//
+// std::vector<AugmentationCoeff> coeffs_a;
+//
+// bool gen_spatial_transform = aug_a.should_do_spatial_transform();
+//
+// for (int n = 0; n < batch_size; n++) {
+// AugmentationCoeff coeff;
+//
+// if (gen_spatial_transform) {
+// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_a, coeff,
+// src_width,
+// src_height,
+// out_width,
+// out_height);
+// }
+//
+// coeffs_a.push_back(coeff);
+// }
+//
+// // Copy spatial coefficients A to the output Tensor on the CPU (output
+// for
+// // FlowAugmentation)
+// auto spat_transform_a = spat_transform_a_t->tensor<float, 2>();
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+// spat_transform_a);
+//
+// // ...as well as a Tensor going to the GPU
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+//
+//
+//
+// spat_transform_a_pinned);
+//
+// CudaLaunchConfig config = GetCudaLaunchConfig(out_count, device);
+// SpatialAugmentation << < config.block_count, config.thread_per_block,
+// 0,
+// device.stream() >> > (
+// config.virtual_thread_count, src_width, src_height, channels,
+// src_count,
+// out_width, out_height,
+// input_a.data(), output_a.data(), spat_transform_a_pinned.data());
+//
+// /*** END AUGMENTATION TO IMAGE A ***/
+//
+// /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+// AugmentationParams aug_b = AugmentationParams(out_height, out_width,
+// params_b_name_,
+// params_b_rand_type_,
+// params_b_exp_,
+// params_b_mean_,
+// params_b_spread_,
+// params_b_prob_);
+//
+// std::vector<AugmentationCoeff> coeffs_b;
+//
+// gen_spatial_transform = aug_b.should_do_spatial_transform();
+//
+// for (int n = 0; n < batch_size; n++) {
+// AugmentationCoeff coeff;
+//
+// if (gen_spatial_transform) {
+// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_b, coeff,
+// src_width,
+// src_height,
+// out_width,
+// out_height);
+// }
+//
+// coeffs_b.push_back(coeff);
+// }
+//
+// /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+//
+// /*** BEGIN AUGMENTATION TO IMAGE B ***/
+// auto input_b = input_b_t.tensor<float, 4>();
+// auto output_b = output_b_t->tensor<float, 4>();
+//
+// // Copy spatial coefficients B to the output Tensor on the CPU
+// auto spat_transform_b = spat_transform_b_t->tensor<float, 2>();
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+// spat_transform_b,
+// true);
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+//
+//
+//
+// spat_transform_b_pinned);
+//
+// SpatialAugmentation << < config.block_count, config.thread_per_block,
+// 0,
+// device.stream() >> > (
+// config.virtual_thread_count, src_width, src_height, channels,
+// src_count,
+// out_width, out_height,
+// input_b.data(), output_b.data(), spat_transform_b_pinned.data());
+//
+// /*** END AUGMENTATION TO IMAGE B ***/
+// }
+//
+// private:
+// std::vector<int32>crop_;
+//
+// // Params A
+// std::vector<string>params_a_name_;
+// std::vector<string>params_a_rand_type_;
+// std::vector<bool>params_a_exp_;
+// std::vector<float>params_a_mean_;
+// std::vector<float>params_a_spread_;
+// std::vector<float>params_a_prob_;
+//
+// // Params B
+// std::vector<string>params_b_name_;
+// std::vector<string>params_b_rand_type_;
+// std::vector<bool>params_b_exp_;
+// std::vector<float>params_b_mean_;
+// std::vector<float>params_b_spread_;
+// std::vector<float>params_b_prob_;
+// };
+} // namespace tensorflow
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h
new file mode 100644
index 0000000..545b8a0
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h
@@ -0,0 +1,22 @@
+#ifndef FLOWNET_DATA_AUGMENTATION_H_
+#define FLOWNET_DATA_AUGMENTATION_H_
+
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+template<class Device>
+void Augment(OpKernelContext *context,
+ const Device & d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs);
+} // namespace tensorflow
+#endif // FLOWNET_DATA_AUGMENTATION_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc
new file mode 100644
index 0000000..b5cc11f
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc
@@ -0,0 +1,129 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_augmentation.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+inline int clamp(int f, int a, int b) {
+ return std::max(a, std::min(f, b));
+}
+
+template<>
+void FillFlowAugmentation(const CPUDevice& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<float, 2>::ConstTensor transforms_from_b) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int src_height = flows.dimension(1);
+ const int src_width = flows.dimension(2);
+ const int src_total_count = flows.dimension(0) * flows.dimension(1) *
+ flows.dimension(2) * flows.dimension(3);
+ float *output_ptr = output.data();
+ const float *flow_ptr = flows.data();
+
+ for (int n = 0; n < batch_size; n++) {
+ const float *transMatA = transforms_from_a.data() + n * 6;
+ const float *transMatB = transforms_from_b.data() + n * 6;
+
+ for (int y = 0; y < out_height; y++) {
+ int outputIdxOffset = (n * out_height + y) * out_width;
+
+ for (int x = 0; x < out_width; x++) {
+ // Apply transformation matrix applied to first image
+ const float xpos1 = x * transMatA[0] + y * transMatA[1] + transMatA[2];
+ const float ypos1 = x * transMatA[3] + y * transMatA[4] + transMatA[5];
+
+ const int srcXIdx =
+ ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) * 2 + 0;
+ const int srcYIdx = srcXIdx + 1;
+
+ const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
+ const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
+
+ // Apply inverse of the transformation matrix applied to second image
+ const float xpos3 = xpos2 * transMatB[0] + ypos2 * transMatB[1] + transMatB[2];
+ const float ypos3 = xpos2 * transMatB[3] + ypos2 * transMatB[4] + transMatB[5];
+
+ output_ptr[(outputIdxOffset + x) * 2 + 0] = xpos3 - (float)x;
+ output_ptr[(outputIdxOffset + x) * 2 + 1] = ypos3 - (float)y;
+ }
+ }
+ }
+}
+
+template<typename Device>
+class FlowAugmentation : public OpKernel {
+ public:
+ explicit FlowAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the crop [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+ OP_REQUIRES(ctx, crop_.size() == 2,
+ errors::InvalidArgument("crop must be 2 dimensions"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& flows_t = ctx->input(0);
+ const Tensor& transforms_from_a_t = ctx->input(1);
+ const Tensor& transforms_from_b_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, flows_t.dims() == 4,
+ errors::InvalidArgument("Input images must have rank 4"));
+ OP_REQUIRES(ctx,
+ (TensorShapeUtils::IsMatrix(transforms_from_a_t.shape()) &&
+ transforms_from_a_t.dim_size(0) ==
+ flows_t.dim_size(0) &&
+ transforms_from_a_t.dim_size(1) == 6),
+ errors::InvalidArgument(
+ "Input transforms_from_a should be num_images x 6"));
+ OP_REQUIRES(ctx,
+ (TensorShapeUtils::IsMatrix(transforms_from_b_t.shape()) &&
+ transforms_from_b_t.dim_size(0) ==
+ flows_t.dim_size(0) &&
+ transforms_from_b_t.dim_size(1) == 6),
+ errors::InvalidArgument(
+ "Input transforms_from_b should be num_images x 6"));
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0,
+ TensorShape({ flows_t.dim_size(0), crop_[0], crop_[1],
+ flows_t.dim_size(3) }), &output_t));
+
+ // Perform flow augmentation
+ auto flows = flows_t.tensor<float, 4>();
+ auto transforms_from_a = transforms_from_a_t.tensor<float, 2>();
+ auto transforms_from_b = transforms_from_b_t.tensor<float, 2>();
+ auto output = output_t->tensor<float, 4>();
+
+ FillFlowAugmentation(ctx->eigen_device<Device>(),
+ output,
+ flows,
+ transforms_from_a,
+ transforms_from_b);
+ }
+
+ private:
+ std::vector<int32>crop_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
+ .Device(DEVICE_CPU),
+ FlowAugmentation<CPUDevice>)
+
+#if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
+ .Device(DEVICE_GPU),
+ FlowAugmentation<GPUDevice>)
+#endif // GOOGLE_CUDA
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h
new file mode 100644
index 0000000..7795991
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h
@@ -0,0 +1,19 @@
+#ifndef FLOWNET_FLOW_AUG_H_
+#define FLOWNET_FLOW_AUG_H_
+
+// See docs in ../ops/image_ops.cc.
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+template<class Device>
+void FillFlowAugmentation(const Device& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<float, 2>::ConstTensor transforms_from_b);
+} // end namespace tensorflow
+
+#endif // FLOWNET_FLOW_AUG_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
new file mode 100644
index 0000000..7e10864
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
@@ -0,0 +1,95 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "flow_augmentation.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+inline __device__ __host__ int clamp(int f, int a, int b) {
+ return max(a, min(f, b));
+}
+
+__global__ void FillFlowAugmentationKernel(
+ const int32 nthreads,
+ const float *flow_ptr,
+ const float *transforms_from_a,
+ const float *inv_transforms_from_b,
+ const int src_total_count, const int src_height, const int src_width,
+ const int batch_size, const int out_height,
+ const int out_width, float *output_ptr) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ const float x = (float)(index % out_width);
+ const float y = (float)((index / out_width) % out_height);
+ const int n = (index / out_width / out_height);
+
+ const int transformIdx = n * 6;
+
+ // Apply transformation matrix applied to second image
+ const float xpos1 = x * transforms_from_a[transformIdx + 0]
+ + y * transforms_from_a[transformIdx + 1]
+ + transforms_from_a[transformIdx + 2];
+ const float ypos1 = x * transforms_from_a[transformIdx + 3]
+ + y * transforms_from_a[transformIdx + 4]
+ + transforms_from_a[transformIdx + 5];
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ const int srcXIdx =
+ ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) *
+ 2 + 0;
+ const int srcYIdx = srcXIdx + 1;
+
+ const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
+ const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
+
+ // Apply inverse of the transformation matrix applied to first image
+ const float xpos3 = xpos2 * inv_transforms_from_b[transformIdx + 0]
+ + ypos2 * inv_transforms_from_b[transformIdx + 1]
+ + inv_transforms_from_b[transformIdx + 2];
+ const float ypos3 = xpos2 * inv_transforms_from_b[transformIdx + 3]
+ + ypos2 * inv_transforms_from_b[transformIdx + 4]
+ + inv_transforms_from_b[transformIdx + 5];
+
+ output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 0] = xpos3 -
+ x;
+ output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 1] = ypos3 -
+ y;
+ }
+}
+
+template<>
+void FillFlowAugmentation(const GPUDevice& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<const float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<const float, 2>::ConstTensor transforms_from_b) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int depth = 2;
+ const int total_count = batch_size * out_height * out_width * depth;
+ const int src_total_count = flows.dimension(0) * flows.dimension(1) *
+ flows.dimension(2) * flows.dimension(3);
+
+ CudaLaunchConfig config = GetCudaLaunchConfig(total_count / 2, device);
+
+ FillFlowAugmentationKernel << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ total_count / 2, flows.data(), transforms_from_a.data(),
+ transforms_from_b.data(),
+ src_total_count, flows.dimension(1), flows.dimension(2), batch_size,
+ out_height, out_width, output.data());
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/preprocessing/preprocessing.cc b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc
new file mode 100644
index 0000000..086a0d0
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc
@@ -0,0 +1,96 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::DimensionHandle;
+
+Status SetOutputToSizedImage(InferenceContext *c) {
+ ShapeHandle input;
+
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+ DimensionHandle batch = c->Dim(input, 0);
+ DimensionHandle depth = c->Dim(input, 3);
+ std::vector<int32> crop_;
+ c->GetAttr("crop", &crop_);
+ DimensionHandle height = c->MakeDim(crop_[0]);
+ DimensionHandle width = c->MakeDim(crop_[1]);
+ c->set_output(0, c->MakeShape({ batch, height, width, depth }));
+ return Status::OK();
+}
+
+REGISTER_OP("DataAugmentation")
+.Input("image_a: float32")
+.Input("image_b: float32")
+.Input("global_step: int64")
+.Attr("crop: list(int) >= 2")
+.Attr("params_a_name: list(string)")
+.Attr("params_a_rand_type: list(string)")
+.Attr("params_a_exp: list(bool)")
+.Attr("params_a_mean: list(float)")
+.Attr("params_a_spread: list(float)")
+.Attr("params_a_prob: list(float)")
+.Attr("params_a_coeff_schedule: list(float)")
+.Attr("params_b_name: list(string)")
+.Attr("params_b_rand_type: list(string)")
+.Attr("params_b_exp: list(bool)")
+.Attr("params_b_mean: list(float)")
+.Attr("params_b_spread: list(float)")
+.Attr("params_b_prob: list(float)")
+.Attr("params_b_coeff_schedule: list(float)")
+.Output("aug_image_a: float32")
+.Output("aug_image_b: float32")
+.Output("transforms_from_a: float32")
+.Output("transforms_from_b: float32")
+.SetShapeFn([](InferenceContext *c) {
+ // Verify input A and input B both have 4 dimensions
+ ShapeHandle input_shape_a, input_shape_b;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape_a));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_shape_b));
+
+ // TODO: Verify params vectors all have the same length
+
+ // TODO: Move this out of here and into Compute
+ // Verify input A and input B are the same shape
+ DimensionHandle batch_size, unused;
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 0),
+ c->Value(c->Dim(input_shape_b, 0)),
+ &batch_size));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 1),
+ c->Value(c->Dim(input_shape_b, 1)), &unused));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 2),
+ c->Value(c->Dim(input_shape_b, 2)), &unused));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 3),
+ c->Value(c->Dim(input_shape_b, 3)), &unused));
+
+ // Get cropping dimensions
+ std::vector<int32>crop_;
+ TF_RETURN_IF_ERROR(c->GetAttr("crop", &crop_));
+
+ // Reshape input shape to cropped shape
+ TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 1, c->MakeDim(crop_[0]),
+ &input_shape_a));
+ TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 2, c->MakeDim(crop_[1]),
+ &input_shape_a));
+
+ // Set output images shapes
+ c->set_output(0, input_shape_a);
+ c->set_output(1, input_shape_a);
+
+ // Set output spatial transforms shapes
+ c->set_output(2, c->MakeShape({ batch_size, 6 }));
+ c->set_output(3, c->MakeShape({ batch_size, 6 }));
+
+ return Status::OK();
+ });
+
+REGISTER_OP("FlowAugmentation")
+.Input("flows: float32")
+.Input("transforms_from_a: float32")
+.Input("transforms_from_b: float32")
+.Attr("crop: list(int) >= 2")
+.Output("transformed_flows: float32")
+.SetShapeFn(SetOutputToSizedImage);
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/training_schedules.py b/Codes/flownet2/src/training_schedules.py
new file mode 100644
index 0000000..4db5aab
--- /dev/null
+++ b/Codes/flownet2/src/training_schedules.py
@@ -0,0 +1,12 @@
+LONG_SCHEDULE = {
+ 'step_values': [400000, 600000, 800000, 1000000],
+ 'learning_rates': [0.0001, 0.00005, 0.000025, 0.0000125, 0.00000625],
+ 'momentum': 0.9,
+ 'momentum2': 0.999,
+ 'weight_decay': 0.0004,
+ 'max_iter': 1200000,
+}
+
+FINETUNE_SCHEDULE = {
+ # TODO: Finetune schedule
+}
diff --git a/Codes/flownet2/src/utils.py b/Codes/flownet2/src/utils.py
new file mode 100644
index 0000000..f6abe18
--- /dev/null
+++ b/Codes/flownet2/src/utils.py
@@ -0,0 +1,46 @@
+import tensorflow as tf
+
+
+# Thanks, https://github.com/tensorflow/tensorflow/issues/4079
+def LeakyReLU(x, leak=0.1, name="lrelu"):
+ with tf.variable_scope(name):
+ f1 = 0.5 * (1.0 + leak)
+ f2 = 0.5 * (1.0 - leak)
+ return f1 * x + f2 * abs(x)
+
+
+def average_endpoint_error(labels, predictions):
+ """
+ Given labels and predictions of size (N, H, W, 2), calculates average endpoint error:
+ sqrt[sum_across_channels{(X - Y)^2}]
+ """
+ num_samples = predictions.shape.as_list()[0]
+ with tf.name_scope(None, "average_endpoint_error", (predictions, labels)) as scope:
+ predictions = tf.to_float(predictions)
+ labels = tf.to_float(labels)
+ predictions.get_shape().assert_is_compatible_with(labels.get_shape())
+
+ squared_difference = tf.square(tf.subtract(predictions, labels))
+ # sum across channels: sum[(X - Y)^2] -> N, H, W, 1
+ loss = tf.reduce_sum(squared_difference, 3, keep_dims=True)
+ loss = tf.sqrt(loss)
+ return tf.reduce_sum(loss) / num_samples
+
+
+def pad(tensor, num=1):
+ """
+ Pads the given tensor along the height and width dimensions with `num` 0s on each side
+ """
+ return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
+
+
+def antipad(tensor, num=1):
+ """
+ Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes
+ padding from the output rather than adding it to the input.
+ """
+ batch, h, w, c = tensor.get_shape().as_list()
+ # print(batch, h, w, c)
+ # print(type(batch), type(h), type(w), type(c))
+ # return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c])
+ return tensor[:, num: num + h - 2 * num, num: num + w - 2 * num, :]