summaryrefslogtreecommitdiff
path: root/Codes
diff options
context:
space:
mode:
authorStevenLiuWen <liuwen@shanghaitech.edu.cn>2018-03-13 03:28:06 -0400
committerStevenLiuWen <liuwen@shanghaitech.edu.cn>2018-03-13 03:28:06 -0400
commitfede6ca1dd0077ff509d84bd24028cc7a93bb119 (patch)
treeaf7f6e759b5dec4fc2964daed09e903958b919ed /Codes
first commit
Diffstat (limited to 'Codes')
-rw-r--r--Codes/constant.py153
-rw-r--r--Codes/evaluate.py576
-rw-r--r--Codes/flownet2/.gitignore9
-rw-r--r--Codes/flownet2/LICENSE21
-rw-r--r--Codes/flownet2/Makefile82
-rw-r--r--Codes/flownet2/README.md66
-rw-r--r--Codes/flownet2/__init__.py0
-rw-r--r--Codes/flownet2/corr.py45
-rw-r--r--Codes/flownet2/src/__init__.py0
-rw-r--r--Codes/flownet2/src/correlation.py35
-rw-r--r--Codes/flownet2/src/dataloader.py329
-rw-r--r--Codes/flownet2/src/dataset_configs.py153
-rw-r--r--Codes/flownet2/src/downsample.py8
-rw-r--r--Codes/flownet2/src/flow_warp.py15
-rw-r--r--Codes/flownet2/src/flowlib.py554
-rw-r--r--Codes/flownet2/src/flownet2/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet2/flownet2.py118
-rw-r--r--Codes/flownet2/src/flownet2/test.py51
-rw-r--r--Codes/flownet2/src/flownet2/train.py24
-rw-r--r--Codes/flownet2/src/flownet_c/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_c/flownet_c.py167
-rw-r--r--Codes/flownet2/src/flownet_c/test.py51
-rw-r--r--Codes/flownet2/src/flownet_c/train.py19
-rw-r--r--Codes/flownet2/src/flownet_cs/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_cs/flownet_cs.py41
-rw-r--r--Codes/flownet2/src/flownet_cs/test.py51
-rw-r--r--Codes/flownet2/src/flownet_cs/train.py21
-rw-r--r--Codes/flownet2/src/flownet_css/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_css/flownet_css.py41
-rw-r--r--Codes/flownet2/src/flownet_css/test.py51
-rw-r--r--Codes/flownet2/src/flownet_css/train.py22
-rw-r--r--Codes/flownet2/src/flownet_s/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_s/flownet_s.py161
-rw-r--r--Codes/flownet2/src/flownet_s/test.py51
-rw-r--r--Codes/flownet2/src/flownet_s/train.py19
-rw-r--r--Codes/flownet2/src/flownet_sd/__init__.py0
-rw-r--r--Codes/flownet2/src/flownet_sd/flownet_sd.py160
-rw-r--r--Codes/flownet2/src/flownet_sd/test.py51
-rw-r--r--Codes/flownet2/src/flownet_sd/train.py19
-rw-r--r--Codes/flownet2/src/net.py177
-rw-r--r--Codes/flownet2/src/ops/build/.gitkeep0
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc160
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc262
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.cc137
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc153
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_kernel.h77
-rw-r--r--Codes/flownet2/src/ops/correlation/correlation_op.cc83
-rw-r--r--Codes/flownet2/src/ops/correlation/pad.cu.cc76
-rw-r--r--Codes/flownet2/src/ops/correlation/pad.h20
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel.cc47
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel.h18
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc108
-rw-r--r--Codes/flownet2/src/ops/downsample/downsample_op.cc30
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.cc48
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc130
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp.h28
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc57
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc126
-rw-r--r--Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc23
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc420
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h228
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc461
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc348
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h22
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc129
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h19
-rw-r--r--Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc95
-rw-r--r--Codes/flownet2/src/ops/preprocessing/preprocessing.cc96
-rw-r--r--Codes/flownet2/src/training_schedules.py12
-rw-r--r--Codes/flownet2/src/utils.py46
-rw-r--r--Codes/flownet2/test.py163
-rw-r--r--Codes/inference.py149
-rw-r--r--Codes/loss_functions.py54
-rw-r--r--Codes/models.py44
-rw-r--r--Codes/models/download_pretrains.sh11
-rw-r--r--Codes/pix2pix.py274
-rw-r--r--Codes/requirements.txt9
-rw-r--r--Codes/runner.sh55
-rw-r--r--Codes/train.py215
-rw-r--r--Codes/training_hyper_params/hyper_params.ini103
-rw-r--r--Codes/unet.py42
-rw-r--r--Codes/utils.py227
82 files changed, 8146 insertions, 0 deletions
diff --git a/Codes/constant.py b/Codes/constant.py
new file mode 100644
index 0000000..eafeab9
--- /dev/null
+++ b/Codes/constant.py
@@ -0,0 +1,153 @@
+import os
+import argparse
+import configparser
+
+
+def get_dir(directory):
+ """
+ get the directory, if no such directory, then make it.
+
+ @param directory: The new directory.
+ """
+
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+ return directory
+
+
+def parser_args():
+ parser = argparse.ArgumentParser(description='Options to run the network.')
+ parser.add_argument('-g', '--gpu', type=str, default='0',
+ help='the device id of gpu.')
+ parser.add_argument('-i', '--iters', type=int, default=1,
+ help='set the number of iterations, default is 1')
+ parser.add_argument('-b', '--batch', type=int, default=4,
+ help='set the batch size, default is 4.')
+ parser.add_argument('--num_his', type=int, default=4,
+ help='set the time steps, default is 4.')
+
+ parser.add_argument('-d', '--dataset', type=str,
+ help='the name of dataset.')
+ parser.add_argument('--train_folder', type=str, default='',
+ help='set the training folder path.')
+ parser.add_argument('--test_folder', type=str, default='',
+ help='set the testing folder path.')
+
+ parser.add_argument('--config', type=str, default='training_hyper_params/hyper_params.ini',
+ help='the path of training_hyper_params, default is training_hyper_params/hyper_params.ini')
+
+ parser.add_argument('--snapshot_dir', type=str, default='',
+ help='if it is folder, then it is the directory to save models, '
+ 'if it is a specific model.ckpt-xxx, then the system will load it for testing.')
+ parser.add_argument('--summary_dir', type=str, default='', help='the directory to save summaries.')
+ parser.add_argument('--psnr_dir', type=str, default='', help='the directory to save psnrs results in testing.')
+
+ parser.add_argument('--evaluate', type=str, default='compute_auc',
+ help='the evaluation metric, default is compute_auc')
+
+ return parser.parse_args()
+
+
+class Const(object):
+ class ConstError(TypeError):
+ pass
+
+ class ConstCaseError(ConstError):
+ pass
+
+ def __setattr__(self, name, value):
+ if name in self.__dict__:
+ raise self.ConstError("Can't change const.{}".format(name))
+ if not name.isupper():
+ raise self.ConstCaseError('const name {} is not all uppercase'.format(name))
+
+ self.__dict__[name] = value
+
+ def __str__(self):
+ _str = '<================ Constants information ================>\n'
+ for name, value in self.__dict__.items():
+ print(name, value)
+ _str += '\t{}\t{}\n'.format(name, value)
+
+ return _str
+
+
+args = parser_args()
+const = Const()
+
+# inputs constants
+const.DATASET = args.dataset
+const.TRAIN_FOLDER = args.train_folder
+const.TEST_FOLDER = args.test_folder
+
+const.GPU = args.gpu
+
+const.BATCH_SIZE = args.batch
+const.NUM_HIS = args.num_his
+const.ITERATIONS = args.iters
+
+const.EVALUATE = args.evaluate
+
+# network constants
+const.HEIGHT = 256
+const.WIDTH = 256
+const.FLOWNET_CHECKPOINT = 'flownet2/checkpoints/FlowNetSD/flownet-SD.ckpt-0'
+const.FLOW_HEIGHT = 384
+const.FLOW_WIDTH = 512
+
+# set training hyper-parameters of different datasets
+config = configparser.ConfigParser()
+assert config.read(args.config)
+
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+const.L_NUM = config.getint(const.DATASET, 'L_NUM')
+# the power to which each gradient term is raised in GDL loss
+const.ALPHA_NUM = config.getint(const.DATASET, 'ALPHA_NUM')
+# the percentage of the adversarial loss to use in the combined loss
+const.LAM_ADV = config.getfloat(const.DATASET, 'LAM_ADV')
+# the percentage of the lp loss to use in the combined loss
+const.LAM_LP = config.getfloat(const.DATASET, 'LAM_LP')
+# the percentage of the GDL loss to use in the combined loss
+const.LAM_GDL = config.getfloat(const.DATASET, 'LAM_GDL')
+# the percentage of the different frame loss
+const.LAM_FLOW = config.getfloat(const.DATASET, 'LAM_FLOW')
+
+# Learning rate of generator
+const.LRATE_G = eval(config.get(const.DATASET, 'LRATE_G'))
+const.LRATE_G_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_G_BOUNDARIES'))
+
+# Learning rate of discriminator
+const.LRATE_D = eval(config.get(const.DATASET, 'LRATE_D'))
+const.LRATE_D_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_D_BOUNDARIES'))
+
+
+const.SAVE_DIR = '{dataset}_l_{L_NUM}_alpha_{ALPHA_NUM}_lp_{LAM_LP}_' \
+ 'adv_{LAM_ADV}_gdl_{LAM_GDL}_flow_{LAM_FLOW}'.format(dataset=const.DATASET,
+ L_NUM=const.L_NUM,
+ ALPHA_NUM=const.ALPHA_NUM,
+ LAM_LP=const.LAM_LP, LAM_ADV=const.LAM_ADV,
+ LAM_GDL=const.LAM_GDL, LAM_FLOW=const.LAM_FLOW)
+
+if args.snapshot_dir:
+ # if the snapshot_dir is model.ckpt-xxx, which means it is the single model for testing.
+ if os.path.exists(args.snapshot_dir + '.meta') or os.path.exists(args.snapshot_dir + '.data-00000-of-00001') or \
+ os.path.exists(args.snapshot_dir + '.index'):
+ const.SNAPSHOT_DIR = args.snapshot_dir
+ print(const.SNAPSHOT_DIR)
+ else:
+ const.SNAPSHOT_DIR = get_dir(os.path.join('models', const.SAVE_DIR + '_' + args.snapshot_dir))
+else:
+ const.SNAPSHOT_DIR = get_dir(os.path.join('models', const.SAVE_DIR))
+
+if args.summary_dir:
+ const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR + '_' + args.summary_dir))
+else:
+ const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR))
+
+if args.psnr_dir:
+ const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR + '_' + args.psnr_dir))
+else:
+ const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR))
+
+
diff --git a/Codes/evaluate.py b/Codes/evaluate.py
new file mode 100644
index 0000000..2bce871
--- /dev/null
+++ b/Codes/evaluate.py
@@ -0,0 +1,576 @@
+import numpy as np
+import scipy.io as scio
+import os
+import argparse
+import pickle
+from sklearn import metrics
+import json
+import socket
+
+
+# data folder contain all datasets, such as ped1, ped2, avenue, shanghaitech, etc
+# DATA_DIR = '../Data'
+hostname = socket.gethostname()
+if hostname == 'dl-T8520-G10': # 119
+ DATA_DIR = '/home/liuwen/ssd/datasets'
+elif hostname == 'admin' or hostname == 'compute101' or hostname == 'compute113' or hostname == 'compute106' \
+ or hostname == 'compute107' or hostname == 'compute114': # node02
+ DATA_DIR = '/home/luowx/liuwen/datasets'
+elif hostname == 'gpu13' or 'gpu14':
+ DATA_DIR = '/public/home/gaoshenghua/liuwen/datasets'
+else:
+ # raise NotImplementedError('Not found this machine {}!'.format(hostname))
+ DATA_DIR = '../Data'
+
+
+# normalize scores in each sub video
+NORMALIZE = True
+
+# number of history frames, since in prediction based method, the first 4 frames can not be predicted, so that
+# the first 4frames are undecidable, we just ignore the first 4 frames
+DECIDABLE_IDX = 4
+
+
+def parser_args():
+ parser = argparse.ArgumentParser(description='evaluating the model, computing the roc/auc.')
+
+ parser.add_argument('-f', '--file', type=str, help='the path of loss file.')
+ parser.add_argument('-t', '--type', type=str, default='compute_auc',
+ help='the type of evaluation, choosing type is: plot_roc, compute_auc, '
+ 'test_func\n, the default type is compute_auc')
+ return parser.parse_args()
+
+
+class RecordResult(object):
+ def __init__(self, fpr=None, tpr=None, auc=-np.inf, dataset=None, loss_file=None):
+ self.fpr = fpr
+ self.tpr = tpr
+ self.auc = auc
+ self.dataset = dataset
+ self.loss_file = loss_file
+
+ def __lt__(self, other):
+ return self.auc < other.auc
+
+ def __gt__(self, other):
+ return self.auc > other.auc
+
+ def __str__(self):
+ return 'dataset = {}, loss file = {}, auc = {}'.format(self.dataset, self.loss_file, self.auc)
+
+
+class GroundTruthLoader(object):
+ AVENUE = 'avenue'
+ PED1 = 'ped1'
+ PED1_PIXEL_SUBSET = 'ped1_pixel_subset'
+ PED2 = 'ped2'
+ ENTRANCE = 'enter'
+ EXIT = 'exit'
+ SHANGHAITECH = 'shanghaitech'
+ SHANGHAITECH_LABEL_PATH = os.path.join(DATA_DIR, 'shanghaitech/testing/test_frame_mask')
+ TOY_DATA = 'toydata'
+ TOY_DATA_LABEL_PATH = os.path.join(DATA_DIR, TOY_DATA, 'toydata.json')
+
+ NAME_MAT_MAPPING = {
+ AVENUE: os.path.join(DATA_DIR, 'avenue/avenue.mat'),
+ PED1: os.path.join(DATA_DIR, 'ped1/ped1.mat'),
+ PED2: os.path.join(DATA_DIR, 'ped2/ped2.mat'),
+ ENTRANCE: os.path.join(DATA_DIR, 'enter/enter.mat'),
+ EXIT: os.path.join(DATA_DIR, 'exit/exit.mat')
+ }
+
+ NAME_FRAMES_MAPPING = {
+ AVENUE: os.path.join(DATA_DIR, 'avenue/testing/frames'),
+ PED1: os.path.join(DATA_DIR, 'ped1/testing/frames'),
+ PED2: os.path.join(DATA_DIR, 'ped2/testing/frames'),
+ ENTRANCE: os.path.join(DATA_DIR, 'enter/testing/frames'),
+ EXIT: os.path.join(DATA_DIR, 'exit/testing/frames')
+ }
+
+ def __init__(self, mapping_json=None):
+ """
+ Initial a ground truth loader, which loads the ground truth with given dataset name.
+
+ :param mapping_json: the mapping from dataset name to the path of ground truth.
+ """
+
+ if mapping_json is not None:
+ with open(mapping_json, 'rb') as json_file:
+ self.mapping = json.load(json_file)
+ else:
+ self.mapping = GroundTruthLoader.NAME_MAT_MAPPING
+
+ def __call__(self, dataset):
+ """ get the ground truth by provided the name of dataset.
+
+ :type dataset: str
+ :param dataset: the name of dataset.
+ :return: np.ndarray, shape(#video)
+ np.array[0] contains all the start frame and end frame of abnormal events of video 0,
+ and its shape is (#frapsnr, )
+ """
+
+ if dataset == GroundTruthLoader.SHANGHAITECH:
+ gt = self.__load_shanghaitech_gt()
+ elif dataset == GroundTruthLoader.TOY_DATA:
+ gt = self.__load_toydata_gt()
+ else:
+ gt = self.__load_ucsd_avenue_subway_gt(dataset)
+ return gt
+
+ def __load_ucsd_avenue_subway_gt(self, dataset):
+ assert dataset in self.mapping, 'there is no dataset named {} \n Please check {}' \
+ .format(dataset, GroundTruthLoader.NAME_MAT_MAPPING.keys())
+
+ mat_file = self.mapping[dataset]
+ abnormal_events = scio.loadmat(mat_file, squeeze_me=True)['gt']
+
+ if abnormal_events.ndim == 2:
+ abnormal_events = abnormal_events.reshape(-1, abnormal_events.shape[0], abnormal_events.shape[1])
+
+ num_video = abnormal_events.shape[0]
+ dataset_video_folder = GroundTruthLoader.NAME_FRAMES_MAPPING[dataset]
+ video_list = os.listdir(dataset_video_folder)
+ video_list.sort()
+
+ assert num_video == len(video_list), 'ground true does not match the number of testing videos. {} != {}' \
+ .format(num_video, len(video_list))
+
+ # get the total frames of sub video
+ def get_video_length(sub_video_number):
+ # video_name = video_name_template.format(sub_video_number)
+ video_name = os.path.join(dataset_video_folder, video_list[sub_video_number])
+ assert os.path.isdir(video_name), '{} is not directory!'.format(video_name)
+
+ length = len(os.listdir(video_name))
+
+ return length
+
+ # need to test [].append, or np.array().append(), which one is faster
+ gt = []
+ for i in range(num_video):
+ length = get_video_length(i)
+
+ sub_video_gt = np.zeros((length,), dtype=np.int8)
+ sub_abnormal_events = abnormal_events[i]
+ if sub_abnormal_events.ndim == 1:
+ sub_abnormal_events = sub_abnormal_events.reshape((sub_abnormal_events.shape[0], -1))
+
+ _, num_abnormal = sub_abnormal_events.shape
+
+ for j in range(num_abnormal):
+ # (start - 1, end - 1)
+ start = sub_abnormal_events[0, j] - 1
+ end = sub_abnormal_events[1, j]
+
+ sub_video_gt[start: end] = 1
+
+ gt.append(sub_video_gt)
+
+ return gt
+
+ @staticmethod
+ def __load_shanghaitech_gt():
+ video_path_list = os.listdir(GroundTruthLoader.SHANGHAITECH_LABEL_PATH)
+ video_path_list.sort()
+
+ gt = []
+ for video in video_path_list:
+ # print(os.path.join(GroundTruthLoader.SHANGHAITECH_LABEL_PATH, video))
+ gt.append(np.load(os.path.join(GroundTruthLoader.SHANGHAITECH_LABEL_PATH, video)))
+
+ return gt
+
+ @staticmethod
+ def __load_toydata_gt():
+ with open(GroundTruthLoader.TOY_DATA_LABEL_PATH, 'r') as gt_file:
+ gt_dict = json.load(gt_file)
+
+ gt = []
+ for video, video_info in gt_dict.items():
+ length = video_info['length']
+ video_gt = np.zeros((length,), dtype=np.uint8)
+ sub_gt = np.array(np.matrix(video_info['gt']))
+
+ for anomaly in sub_gt:
+ start = anomaly[0]
+ end = anomaly[1] + 1
+ video_gt[start: end] = 1
+ gt.append(video_gt)
+ return gt
+
+ @staticmethod
+ def get_pixel_masks_file_list(dataset):
+ # pixel mask folder
+ pixel_mask_folder = os.path.join(DATA_DIR, dataset, 'pixel_masks')
+ pixel_mask_file_list = os.listdir(pixel_mask_folder)
+ pixel_mask_file_list.sort()
+
+ # get all testing videos
+ dataset_video_folder = GroundTruthLoader.NAME_FRAMES_MAPPING[dataset]
+ video_list = os.listdir(dataset_video_folder)
+ video_list.sort()
+
+ # get all testing video names with pixel masks
+ pixel_video_ids = []
+ ids = 0
+ for pixel_mask_name in pixel_mask_file_list:
+ while ids < len(video_list):
+ if video_list[ids] + '.npy' == pixel_mask_name:
+ pixel_video_ids.append(ids)
+ ids += 1
+ break
+ else:
+ ids += 1
+
+ assert len(pixel_video_ids) == len(pixel_mask_file_list)
+
+ for i in range(len(pixel_mask_file_list)):
+ pixel_mask_file_list[i] = os.path.join(pixel_mask_folder, pixel_mask_file_list[i])
+
+ return pixel_mask_file_list, pixel_video_ids
+
+
+def load_psnr_gt(loss_file):
+ with open(loss_file, 'rb') as reader:
+ # results {
+ # 'dataset': the name of dataset
+ # 'psnr': the psnr of each testing videos,
+ # }
+
+ # psnr_records['psnr'] is np.array, shape(#videos)
+ # psnr_records[0] is np.array ------> 01.avi
+ # psnr_records[1] is np.array ------> 02.avi
+ # ......
+ # psnr_records[n] is np.array ------> xx.avi
+
+ results = pickle.load(reader)
+
+ dataset = results['dataset']
+ psnr_records = results['psnr']
+
+ num_videos = len(psnr_records)
+
+ # load ground truth
+ gt_loader = GroundTruthLoader()
+ gt = gt_loader(dataset=dataset)
+
+ assert num_videos == len(gt), 'the number of saved videos does not match the ground truth, {} != {}' \
+ .format(num_videos, len(gt))
+
+ return dataset, psnr_records, gt
+
+
+def load_psnr_gt_flow(loss_file):
+ with open(loss_file, 'rb') as reader:
+ # results {
+ # 'dataset': the name of dataset
+ # 'psnr': the psnr of each testing videos,
+ # }
+
+ # psnr_records['psnr'] is np.array, shape(#videos)
+ # psnr_records[0] is np.array ------> 01.avi
+ # psnr_records[1] is np.array ------> 02.avi
+ # ......
+ # psnr_records[n] is np.array ------> xx.avi
+
+ results = pickle.load(reader)
+
+ dataset = results['dataset']
+ psnrs = results['psnr']
+ flows = results['flow']
+
+ num_videos = len(psnrs)
+
+ # load ground truth
+ gt_loader = GroundTruthLoader()
+ gt = gt_loader(dataset=dataset)
+
+ assert num_videos == len(gt), 'the number of saved videos does not match the ground truth, {} != {}' \
+ .format(num_videos, len(gt))
+
+ return dataset, psnrs, flows, gt
+
+
+def load_psnr(loss_file):
+ """
+ load image psnr or optical flow psnr.
+ :param loss_file: loss file path
+ :return:
+ """
+ with open(loss_file, 'rb') as reader:
+ # results {
+ # 'dataset': the name of dataset
+ # 'psnr': the psnr of each testing videos,
+ # }
+
+ # psnr_records['psnr'] is np.array, shape(#videos)
+ # psnr_records[0] is np.array ------> 01.avi
+ # psnr_records[1] is np.array ------> 02.avi
+ # ......
+ # psnr_records[n] is np.array ------> xx.avi
+
+ results = pickle.load(reader)
+ psnrs = results['psnr']
+ return psnrs
+
+
+def get_scores_labels(loss_file):
+ # the name of dataset, loss, and ground truth
+ dataset, psnr_records, gt = load_psnr_gt(loss_file=loss_file)
+
+ # the number of videos
+ num_videos = len(psnr_records)
+
+ scores = np.array([], dtype=np.float32)
+ labels = np.array([], dtype=np.int8)
+ # video normalization
+ for i in range(num_videos):
+ distance = psnr_records[i]
+
+ if NORMALIZE:
+ distance -= distance.min() # distances = (distance - min) / (max - min)
+ distance /= distance.max()
+ # distance = 1 - distance
+
+ scores = np.concatenate((scores[:], distance[DECIDABLE_IDX:]), axis=0)
+ labels = np.concatenate((labels[:], gt[i][DECIDABLE_IDX:]), axis=0)
+ return dataset, scores, labels
+
+
+def precision_recall_auc(loss_file):
+ if not os.path.isdir(loss_file):
+ loss_file_list = [loss_file]
+ else:
+ loss_file_list = os.listdir(loss_file)
+ loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]
+
+ optimal_results = RecordResult()
+ for sub_loss_file in loss_file_list:
+ dataset, scores, labels = get_scores_labels(sub_loss_file)
+ precision, recall, thresholds = metrics.precision_recall_curve(labels, scores, pos_label=0)
+ auc = metrics.auc(recall, precision)
+
+ results = RecordResult(recall, precision, auc, dataset, sub_loss_file)
+
+ if optimal_results < results:
+ optimal_results = results
+
+ if os.path.isdir(loss_file):
+ print(results)
+ print('##### optimal result and model = {}'.format(optimal_results))
+ return optimal_results
+
+
+def cal_eer(fpr, tpr):
+ # makes fpr + tpr = 1
+ eer = fpr[np.nanargmin(np.absolute((fpr + tpr - 1)))]
+ return eer
+
+
+def compute_eer(loss_file):
+ if not os.path.isdir(loss_file):
+ loss_file_list = [loss_file]
+ else:
+ loss_file_list = os.listdir(loss_file)
+ loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]
+
+ optimal_results = RecordResult(auc=np.inf)
+ for sub_loss_file in loss_file_list:
+ dataset, scores, labels = get_scores_labels(sub_loss_file)
+ fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0)
+ eer = cal_eer(fpr, tpr)
+
+ results = RecordResult(fpr, tpr, eer, dataset, sub_loss_file)
+
+ if optimal_results > results:
+ optimal_results = results
+
+ if os.path.isdir(loss_file):
+ print(results)
+ print('##### optimal result and model = {}'.format(optimal_results))
+ return optimal_results
+
+
+def compute_auc(loss_file):
+ if not os.path.isdir(loss_file):
+ loss_file_list = [loss_file]
+ else:
+ loss_file_list = os.listdir(loss_file)
+ loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]
+
+ optimal_results = RecordResult()
+ for sub_loss_file in loss_file_list:
+ # the name of dataset, loss, and ground truth
+ dataset, psnr_records, gt = load_psnr_gt(loss_file=sub_loss_file)
+
+ # the number of videos
+ num_videos = len(psnr_records)
+
+ scores = np.array([], dtype=np.float32)
+ labels = np.array([], dtype=np.int8)
+ # video normalization
+ for i in range(num_videos):
+ distance = psnr_records[i]
+
+ if NORMALIZE:
+ distance -= distance.min() # distances = (distance - min) / (max - min)
+ distance /= distance.max()
+ # distance = 1 - distance
+
+ scores = np.concatenate((scores, distance[DECIDABLE_IDX:]), axis=0)
+ labels = np.concatenate((labels, gt[i][DECIDABLE_IDX:]), axis=0)
+
+ fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0)
+ auc = metrics.auc(fpr, tpr)
+
+ results = RecordResult(fpr, tpr, auc, dataset, sub_loss_file)
+
+ if optimal_results < results:
+ optimal_results = results
+
+ if os.path.isdir(loss_file):
+ print(results)
+ print('##### optimal result and model = {}'.format(optimal_results))
+ return optimal_results
+
+
+def average_psnr(loss_file):
+ if not os.path.isdir(loss_file):
+ loss_file_list = [loss_file]
+ else:
+ loss_file_list = os.listdir(loss_file)
+ loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]
+
+ max_avg_psnr = -np.inf
+ max_file = ''
+ for file in loss_file_list:
+ psnr_records = load_psnr(file)
+
+ psnr_records = np.concatenate(psnr_records, axis=0)
+ avg_psnr = np.mean(psnr_records)
+ if max_avg_psnr < avg_psnr:
+ max_avg_psnr = avg_psnr
+ max_file = file
+ print('{}, average psnr = {}'.format(file, avg_psnr))
+
+ print('max average psnr file = {}, psnr = {}'.format(max_file, max_avg_psnr))
+
+
+def calculate_psnr(loss_file):
+ optical_result = compute_auc(loss_file)
+ print('##### optimal result and model = {}'.format(optical_result))
+
+ mean_psnr = []
+ for file in os.listdir(loss_file):
+ file = os.path.join(loss_file, file)
+ dataset, psnr_records, gt = load_psnr_gt(file)
+
+ psnr_records = np.concatenate(psnr_records, axis=0)
+ gt = np.concatenate(gt, axis=0)
+
+ mean_normal_psnr = np.mean(psnr_records[gt == 0])
+ mean_abnormal_psnr = np.mean(psnr_records[gt == 1])
+ mean = np.mean(psnr_records)
+ print('mean normal psrn = {}, mean abnormal psrn = {}, mean = {}'.format(
+ mean_normal_psnr,
+ mean_abnormal_psnr,
+ mean)
+ )
+ mean_psnr.append(mean)
+ print('max mean psnr = {}'.format(np.max(mean_psnr)))
+
+
+def calculate_score(loss_file):
+ if not os.path.isdir(loss_file):
+ loss_file_path = loss_file
+ else:
+ optical_result = compute_auc(loss_file)
+ loss_file_path = optical_result.loss_file
+ print('##### optimal result and model = {}'.format(optical_result))
+ dataset, psnr_records, gt = load_psnr_gt(loss_file=loss_file_path)
+
+ # the number of videos
+ num_videos = len(psnr_records)
+
+ scores = np.array([], dtype=np.float32)
+ labels = np.array([], dtype=np.int8)
+ # video normalization
+ for i in range(num_videos):
+ distance = psnr_records[i]
+
+ distance = (distance - distance.min()) / (distance.max() - distance.min())
+
+ scores = np.concatenate((scores, distance[DECIDABLE_IDX:]), axis=0)
+ labels = np.concatenate((labels, gt[i][DECIDABLE_IDX:]), axis=0)
+
+ mean_normal_scores = np.mean(scores[labels == 0])
+ mean_abnormal_scores = np.mean(scores[labels == 1])
+ print('mean normal scores = {}, mean abnormal scores = {}, '
+ 'delta = {}'.format(mean_normal_scores, mean_abnormal_scores, mean_normal_scores - mean_abnormal_scores))
+
+
+def test_func(*args):
+ # simulate testing on CUHK AVENUE dataset
+ dataset = GroundTruthLoader.AVENUE
+
+ # load the ground truth
+ gt_loader = GroundTruthLoader()
+ gt = gt_loader(dataset=dataset)
+
+ num_videos = len(gt)
+
+ simulated_results = {
+ 'dataset': dataset,
+ 'psnr': []
+ }
+
+ simulated_psnr = []
+ for i in range(num_videos):
+ sub_video_length = gt[i].shape[0]
+ simulated_psnr.append(np.random.random(size=sub_video_length))
+
+ simulated_results['psnr'] = simulated_psnr
+
+ # writing to file, 'generated_loss.bin'
+ with open('generated_loss.bin', 'wb') as writer:
+ pickle.dump(simulated_results, writer, pickle.HIGHEST_PROTOCOL)
+
+ print(file_path.name)
+ result = compute_auc(file_path.name)
+
+ print('optimal = {}'.format(result))
+
+
+eval_type_function = {
+ 'compute_auc': compute_auc,
+ 'compute_eer': compute_eer,
+ 'precision_recall_auc': precision_recall_auc,
+ 'calculate_psnr': calculate_psnr,
+ 'calculate_score': calculate_score,
+ 'average_psnr': average_psnr,
+ 'average_psnr_sample': average_psnr
+}
+
+
+def evaluate(eval_type, save_file):
+ assert eval_type in eval_type_function, 'there is no type of evaluation {}, please check {}' \
+ .format(eval_type, eval_type_function.keys())
+ eval_func = eval_type_function[eval_type]
+ optimal_results = eval_func(save_file)
+ return optimal_results
+
+
+if __name__ == '__main__':
+ args = parser_args()
+
+ eval_type = args.type
+ file_path = args.file
+
+ print('Evaluate type = {}'.format(eval_type))
+ print('File path = {}'.format(file_path))
+
+ if eval_type == 'test_func':
+ test_func()
+ else:
+ evaluate(eval_type, file_path) \ No newline at end of file
diff --git a/Codes/flownet2/.gitignore b/Codes/flownet2/.gitignore
new file mode 100644
index 0000000..31abf4e
--- /dev/null
+++ b/Codes/flownet2/.gitignore
@@ -0,0 +1,9 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.o
+*.so
+*.so.dSYM
+checkpoints/
+!checkpoints/download.sh
+!checkpoints/README.md
diff --git a/Codes/flownet2/LICENSE b/Codes/flownet2/LICENSE
new file mode 100644
index 0000000..d2cc224
--- /dev/null
+++ b/Codes/flownet2/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Sam Pepose
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Codes/flownet2/Makefile b/Codes/flownet2/Makefile
new file mode 100644
index 0000000..073c011
--- /dev/null
+++ b/Codes/flownet2/Makefile
@@ -0,0 +1,82 @@
+# Makefile
+
+TF_INC = `python -c "import tensorflow; print(tensorflow.sysconfig.get_include())"`
+
+ifndef CUDA_HOME
+ CUDA_HOME := /usr/local/cuda
+endif
+
+CC = gcc -O2 -pthread
+CXX = g++
+GPUCC = nvcc
+CFLAGS = -std=c++11 -I$(TF_INC) -I"$(CUDA_HOME)/include" -DGOOGLE_CUDA=1
+GPUCFLAGS = -c
+LFLAGS = -pthread -shared -fPIC
+GPULFLAGS = -x cu -Xcompiler -fPIC
+CGPUFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart
+
+OUT_DIR = src/ops/build
+PREPROCESSING_SRC = "src/ops/preprocessing/preprocessing.cc" "src/ops/preprocessing/kernels/flow_augmentation.cc" "src/ops/preprocessing/kernels/augmentation_base.cc" "src/ops/preprocessing/kernels/data_augmentation.cc"
+GPU_SRC_DATA_AUG = src/ops/preprocessing/kernels/data_augmentation.cu.cc
+GPU_SRC_FLOW = src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
+GPU_PROD_DATA_AUG = $(OUT_DIR)/data_augmentation.o
+GPU_PROD_FLOW = $(OUT_DIR)/flow_augmentation_gpu.o
+PREPROCESSING_PROD = $(OUT_DIR)/preprocessing.so
+
+DOWNSAMPLE_SRC = "src/ops/downsample/downsample_kernel.cc" "src/ops/downsample/downsample_op.cc"
+GPU_SRC_DOWNSAMPLE = src/ops/downsample/downsample_kernel_gpu.cu.cc
+GPU_PROD_DOWNSAMPLE = $(OUT_DIR)/downsample_kernel_gpu.o
+DOWNSAMPLE_PROD = $(OUT_DIR)/downsample.so
+
+CORRELATION_SRC = "src/ops/correlation/correlation_kernel.cc" "src/ops/correlation/correlation_grad_kernel.cc" "src/ops/correlation/correlation_op.cc"
+GPU_SRC_CORRELATION = src/ops/correlation/correlation_kernel.cu.cc
+GPU_SRC_CORRELATION_GRAD = src/ops/correlation/correlation_grad_kernel.cu.cc
+GPU_SRC_PAD = src/ops/correlation/pad.cu.cc
+GPU_PROD_CORRELATION = $(OUT_DIR)/correlation_kernel_gpu.o
+GPU_PROD_CORRELATION_GRAD = $(OUT_DIR)/correlation_grad_kernel_gpu.o
+GPU_PROD_PAD = $(OUT_DIR)/correlation_pad_gpu.o
+CORRELATION_PROD = $(OUT_DIR)/correlation.so
+
+FLOWWARP_SRC = "src/ops/flow_warp/flow_warp_op.cc" "src/ops/flow_warp/flow_warp.cc" "src/ops/flow_warp/flow_warp_grad.cc"
+GPU_SRC_FLOWWARP = "src/ops/flow_warp/flow_warp.cu.cc"
+GPU_SRC_FLOWWARP_GRAD = "src/ops/flow_warp/flow_warp_grad.cu.cc"
+GPU_PROD_FLOWWARP = "$(OUT_DIR)/flow_warp_gpu.o"
+GPU_PROD_FLOWWARP_GRAD = "$(OUT_DIR)/flow_warp_grad_gpu.o"
+FLOWWARP_PROD = "$(OUT_DIR)/flow_warp.so"
+
+ifeq ($(OS),Windows_NT)
+ detected_OS := Windows
+else
+ detected_OS := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+endif
+ifeq ($(detected_OS),Darwin) # Mac OS X
+ CGPUFLAGS += -undefined dynamic_lookup
+endif
+ifeq ($(detected_OS),Linux)
+ CFLAGS += -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__ -D_GLIBCXX_USE_CXX11_ABI=0
+endif
+
+all: preprocessing downsample correlation flowwarp
+
+preprocessing:
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DATA_AUG) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DATA_AUG)
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOW) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOW)
+ $(CXX) -g $(CFLAGS) $(PREPROCESSING_SRC) $(GPU_PROD_DATA_AUG) $(GPU_PROD_FLOW) $(LFLAGS) $(CGPUFLAGS) -o $(PREPROCESSING_PROD)
+
+downsample:
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DOWNSAMPLE) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DOWNSAMPLE)
+ $(CXX) -g $(CFLAGS) $(DOWNSAMPLE_SRC) $(GPU_PROD_DOWNSAMPLE) $(LFLAGS) $(CGPUFLAGS) -o $(DOWNSAMPLE_PROD)
+
+correlation:
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION)
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION_GRAD)
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_PAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_PAD)
+ $(CXX) -g $(CFLAGS) $(CORRELATION_SRC) $(GPU_PROD_CORRELATION) $(GPU_PROD_CORRELATION_GRAD) $(GPU_PROD_PAD) $(LFLAGS) $(CGPUFLAGS) -o $(CORRELATION_PROD)
+
+flowwarp:
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP)
+ $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP_GRAD)
+ $(CXX) -g $(CFLAGS) $(FLOWWARP_SRC) $(GPU_PROD_FLOWWARP) $(GPU_PROD_FLOWWARP_GRAD) $(LFLAGS) $(CGPUFLAGS) -o $(FLOWWARP_PROD)
+
+clean:
+ rm -f $(PREPROCESSING_PROD) $(GPU_PROD_FLOW) $(GPU_PROD_DATA_AUG) $(DOWNSAMPLE_PROD) $(GPU_PROD_DOWNSAMPLE)
diff --git a/Codes/flownet2/README.md b/Codes/flownet2/README.md
new file mode 100644
index 0000000..8647723
--- /dev/null
+++ b/Codes/flownet2/README.md
@@ -0,0 +1,66 @@
+## FlowNet2 (TensorFlow)
+
+This repo contains FlowNet2[1] for TensorFlow. It includes FlowNetC, S, CS, CSS, CSS-ft-sd, SD, and 2.
+
+### Installation
+```
+pip install enum
+pip install pypng
+pip install matplotlib
+pip install image
+pip install scipy
+pip install numpy
+pip install tensorflow
+```
+
+Linux:
+`sudo apt-get install python-tk`
+
+You must have CUDA installed:
+`make all`
+
+### Download weights
+To download the weights for all models (4.4GB), run the `download.sh` script in the `checkpoints` directory. All test scripts rely on these checkpoints to work properly.
+
+
+### Flow Generation (1 image pair)
+
+```
+python -m src.flownet2.test --input_a data/samples/0img0.ppm --input_b data/samples/0img1.ppm --out ./
+```
+
+Available models:
+* `flownet2`
+* `flownet_s`
+* `flownet_c`
+* `flownet_cs`
+* `flownet_css` (can edit test.py to use css-ft-sd weights)
+* `flownet_sd`
+
+If installation is successful, you should predict the following flow from samples/0img0.ppm:
+![FlowNet2 Sample Prediction](/data/samples/0flow-pred-flownet2.png?raw=true)
+
+### Training
+If you would like to train any of the networks from scratch (replace `flownet2` with the appropriate model):
+```
+python -m src.flownet2.train
+```
+For stacked networks, previous network weights will be loaded and fixed. For example, if training CS, the C weights are loaded and fixed and the S weights are randomly initialized.
+
+
+### Fine-tuning
+TODO
+
+### Benchmarks
+Benchmarks are for a forward pass with each model of two 512x384 images. All benchmarks were tested with a K80 GPU and Intel Xeon CPU E5-2682 v4 @ 2.30GHz. Code was executed with TensorFlow-1.2.1 and python 2.7.12 on Ubuntu 16.04. Resulting times were averaged over 10 runs. The first run is always slower as it sets up the Tensorflow Session.
+
+| | S | C | CS | CSS | SD | 2
+| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
+| First Run | 681.039ms | 898.792ms | 998.584ms | 1063.357ms | 933.806ms | 1882.003ms |
+| Subsequent Runs | 38.067ms | 78.789ms | 123.300ms | 161.186ms | 62.061ms | 276.641ms |
+
+
+### Sources
+[1] E. Ilg, N. Mayer, T. Saikia, M. Keuper, A. Dosovitskiy, T. Brox
+FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks,
+IEEE Conference in Computer Vision and Pattern Recognition (CVPR), 2017.
diff --git a/Codes/flownet2/__init__.py b/Codes/flownet2/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/__init__.py
diff --git a/Codes/flownet2/corr.py b/Codes/flownet2/corr.py
new file mode 100644
index 0000000..3301d8c
--- /dev/null
+++ b/Codes/flownet2/corr.py
@@ -0,0 +1,45 @@
+import tensorflow as tf
+import numpy as np
+import math
+
+BATCH_SIZE = 8
+HEIGHT = 30
+WIDTH = 60
+CHANNELS = 3
+
+NEIGHBORHOOD_SIZE = 41
+MAX_DISPLACEMENT = int(math.ceil(NEIGHBORHOOD_SIZE / 2.0))
+STRIDE_2 = 2
+
+assert(STRIDE_2 <= NEIGHBORHOOD_SIZE)
+
+# Define two feature maps
+fmA = tf.ones((BATCH_SIZE, HEIGHT, WIDTH, CHANNELS), dtype=tf.int32)
+fmB = tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE, HEIGHT, WIDTH, CHANNELS)), dtype=tf.int32)
+
+depth = int(math.floor((2.0 * MAX_DISPLACEMENT + 1) / STRIDE_2) ** 2)
+
+print('Output should be size:', (BATCH_SIZE, HEIGHT, WIDTH, depth))
+print('Striding at values: ', [e for e in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2)])
+
+def main():
+ out = []
+ for i in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # height
+ for j in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # width
+ padded_a = tf.pad(fmA, [[0,0], [0, abs(i)], [0, abs(j)], [0, 0]])
+ padded_b = tf.pad(fmB, [[0, 0], [abs(i), 0], [abs(j), 0], [0, 0]])
+ m = padded_a * padded_b
+
+ height_start_idx = 0 if i <= 0 else i
+ height_end_idx = height_start_idx + HEIGHT
+ width_start_idx = 0 if j <= 0 else j
+ width_end_idx = width_start_idx + WIDTH
+ cut = m[:, height_start_idx:height_end_idx, width_start_idx:width_end_idx, :]
+
+ final = tf.reduce_sum(cut, 3)
+ out.append(final)
+ corr = tf.stack(out, 3)
+ print('Output size: ', corr.shape)
+
+
+main()
diff --git a/Codes/flownet2/src/__init__.py b/Codes/flownet2/src/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/__init__.py
diff --git a/Codes/flownet2/src/correlation.py b/Codes/flownet2/src/correlation.py
new file mode 100644
index 0000000..60a5c37
--- /dev/null
+++ b/Codes/flownet2/src/correlation.py
@@ -0,0 +1,35 @@
+import tensorflow as tf
+
+_correlation_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/correlation.so"))
+
+
+def correlation(input_a, input_b, kernel_size, max_displacement, stride_1, stride_2, padding):
+ return _correlation_ops.correlation(input_a,
+ input_b,
+ kernel_size,
+ max_displacement,
+ stride_1,
+ stride_2,
+ padding)
+
+
+@tf.RegisterGradient("Correlation")
+def _correlation_grad(corr_op, gradients):
+ kernel_size = corr_op.get_attr("kernel_size")
+ max_displacement = corr_op.get_attr("max_displacement")
+ stride_1 = corr_op.get_attr("stride_1")
+ stride_2 = corr_op.get_attr("stride_2")
+ pad = corr_op.get_attr("pad")
+
+ corr_grads = _correlation_ops.correlation_grad(gradients,
+ corr_op.inputs[0],
+ corr_op.inputs[1],
+ kernel_size,
+ max_displacement,
+ stride_1,
+ stride_2,
+ pad)
+
+ # Return the gradients with respect to input_a and input_b
+ return corr_grads.backprops_a, corr_grads.backprops_b
diff --git a/Codes/flownet2/src/dataloader.py b/Codes/flownet2/src/dataloader.py
new file mode 100644
index 0000000..22a6ddb
--- /dev/null
+++ b/Codes/flownet2/src/dataloader.py
@@ -0,0 +1,329 @@
+# -*- coding: utf-8 -*-
+import tensorflow as tf
+import copy
+slim = tf.contrib.slim
+
+_preprocessing_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/preprocessing.so"))
+
+
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+class Image(slim.tfexample_decoder.ItemHandler):
+ """An ItemHandler that decodes a parsed Tensor as an image."""
+
+ def __init__(self,
+ image_key=None,
+ format_key=None,
+ shape=None,
+ channels=3,
+ dtype=tf.uint8,
+ repeated=False):
+ """Initializes the image.
+ Args:
+ image_key: the name of the TF-Example feature in which the encoded image
+ is stored.
+ shape: the output shape of the image as 1-D `Tensor`
+ [height, width, channels]. If provided, the image is reshaped
+ accordingly. If left as None, no reshaping is done. A shape should
+ be supplied only if all the stored images have the same shape.
+ channels: the number of channels in the image.
+ dtype: images will be decoded at this bit depth. Different formats
+ support different bit depths.
+ See tf.image.decode_image,
+ tf.decode_raw,
+ repeated: if False, decodes a single image. If True, decodes a
+ variable number of image strings from a 1D tensor of strings.
+ """
+ if not image_key:
+ image_key = 'image/encoded'
+
+ super(Image, self).__init__([image_key])
+ self._image_key = image_key
+ self._shape = shape
+ self._channels = channels
+ self._dtype = dtype
+ self._repeated = repeated
+
+ def tensors_to_item(self, keys_to_tensors):
+ """See base class."""
+ image_buffer = keys_to_tensors[self._image_key]
+
+ if self._repeated:
+ return functional_ops.map_fn(lambda x: self._decode(x),
+ image_buffer, dtype=self._dtype)
+ else:
+ return self._decode(image_buffer)
+
+ def _decode(self, image_buffer):
+ """Decodes the image buffer.
+ Args:
+ image_buffer: The tensor representing the encoded image tensor.
+ Returns:
+ A tensor that represents decoded image of self._shape, or
+ (?, ?, self._channels) if self._shape is not specified.
+ """
+ def decode_raw():
+ """Decodes a raw image."""
+ return tf.decode_raw(image_buffer, out_type=self._dtype)
+
+ image = decode_raw()
+ # image.set_shape([None, None, self._channels])
+ if self._shape is not None:
+ image = tf.reshape(image, self._shape)
+
+ return image
+
+
+def __get_dataset(dataset_config, split_name):
+ """
+ dataset_config: A dataset_config defined in datasets.py
+ split_name: 'train'/'validate'
+ """
+ with tf.name_scope('__get_dataset'):
+ if split_name not in dataset_config['SIZES']:
+ raise ValueError('split name %s not recognized' % split_name)
+
+ IMAGE_HEIGHT, IMAGE_WIDTH = dataset_config['IMAGE_HEIGHT'], dataset_config['IMAGE_WIDTH']
+ reader = tf.TFRecordReader
+ keys_to_features = {
+ 'image_a': tf.FixedLenFeature((), tf.string),
+ 'image_b': tf.FixedLenFeature((), tf.string),
+ 'flow': tf.FixedLenFeature((), tf.string),
+ }
+ items_to_handlers = {
+ 'image_a': Image(
+ image_key='image_a',
+ dtype=tf.float64,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
+ channels=3),
+ 'image_b': Image(
+ image_key='image_b',
+ dtype=tf.float64,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
+ channels=3),
+ 'flow': Image(
+ image_key='flow',
+ dtype=tf.float32,
+ shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 2],
+ channels=2),
+ }
+ decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
+ return slim.dataset.Dataset(
+ data_sources=dataset_config['PATHS'][split_name],
+ reader=reader,
+ decoder=decoder,
+ num_samples=dataset_config['SIZES'][split_name],
+ items_to_descriptions=dataset_config['ITEMS_TO_DESCRIPTIONS'])
+
+
+def config_to_arrays(dataset_config):
+ output = {
+ 'name': [],
+ 'rand_type': [],
+ 'exp': [],
+ 'mean': [],
+ 'spread': [],
+ 'prob': [],
+ 'coeff_schedule': [],
+ }
+ config = copy.deepcopy(dataset_config)
+
+ if 'coeff_schedule_param' in config:
+ del config['coeff_schedule_param']
+
+ # Get all attributes
+ for (name, value) in config.iteritems():
+ if name == 'coeff_schedule_param':
+ output['coeff_schedule'] = [value['half_life'],
+ value['initial_coeff'],
+ value['final_coeff']]
+ else:
+ output['name'].append(name)
+ output['rand_type'].append(value['rand_type'])
+ output['exp'].append(value['exp'])
+ output['mean'].append(value['mean'])
+ output['spread'].append(value['spread'])
+ output['prob'].append(value['prob'])
+
+ return output
+
+
+# https://github.com/tgebru/transform/blob/master/src/caffe/layers/data_augmentation_layer.cpp#L34
+def _generate_coeff(param, discount_coeff=tf.constant(1.0), default_value=tf.constant(0.0)):
+ if not all(name in param for name in ['rand_type', 'exp', 'mean', 'spread', 'prob']):
+ raise RuntimeError('Expected rand_type, exp, mean, spread, prob in `param`')
+
+ rand_type = param['rand_type']
+ exp = float(param['exp'])
+ mean = tf.convert_to_tensor(param['mean'], dtype=tf.float32)
+ spread = float(param['spread']) # AKA standard deviation
+ prob = float(param['prob'])
+
+ # Multiply spread by our discount_coeff so it changes over time
+ spread = spread * discount_coeff
+
+ if rand_type == 'uniform':
+ value = tf.cond(spread > 0.0,
+ lambda: tf.random_uniform([], mean - spread, mean + spread),
+ lambda: mean)
+ if exp:
+ value = tf.exp(value)
+ elif rand_type == 'gaussian':
+ value = tf.cond(spread > 0.0,
+ lambda: tf.random_normal([], mean, spread),
+ lambda: mean)
+ if exp:
+ value = tf.exp(value)
+ elif rand_type == 'bernoulli':
+ if prob > 0.0:
+ value = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ value = 0.0
+ elif rand_type == 'uniform_bernoulli':
+ tmp1 = 0.0
+ tmp2 = 0
+ if prob > 0.0:
+ tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ tmp2 = 0
+
+ if tmp2 == 0:
+ if default_value is not None:
+ return default_value
+ else:
+ tmp1 = tf.cond(spread > 0.0,
+ lambda: tf.random_uniform([], mean - spread, mean + spread),
+ lambda: mean)
+ if exp:
+ tmp1 = tf.exp(tmp1)
+ value = tmp1
+ elif rand_type == 'gaussian_bernoulli':
+ tmp1 = 0.0
+ tmp2 = 0
+ if prob > 0.0:
+ tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([])
+ else:
+ tmp2 = 0
+
+ if tmp2 == 0:
+ if default_value is not None:
+ return default_value
+ else:
+ tmp1 = tf.cond(spread > 0.0,
+ lambda: tf.random_normal([], mean, spread),
+ lambda: mean)
+ if exp:
+ tmp1 = tf.exp(tmp1)
+ value = tmp1
+ else:
+ raise ValueError('Unknown distribution type %s.' % rand_type)
+ return value
+
+
+def load_batch(dataset_config, split_name, global_step):
+ num_threads = 32
+ reader_kwargs = {'options': tf.python_io.TFRecordOptions(
+ tf.python_io.TFRecordCompressionType.ZLIB)}
+
+ with tf.name_scope('load_batch'):
+ dataset = __get_dataset(dataset_config, split_name)
+ data_provider = slim.dataset_data_provider.DatasetDataProvider(
+ dataset,
+ num_readers=num_threads,
+ common_queue_capacity=2048,
+ common_queue_min=1024,
+ reader_kwargs=reader_kwargs)
+ image_a, image_b, flow = data_provider.get(['image_a', 'image_b', 'flow'])
+ image_a, image_b, flow = map(tf.to_float, [image_a, image_b, flow])
+
+ if dataset_config['PREPROCESS']['scale']:
+ image_a = image_a / 255.0
+ image_b = image_b / 255.0
+
+ crop = [dataset_config['PREPROCESS']['crop_height'],
+ dataset_config['PREPROCESS']['crop_width']]
+ config_a = config_to_arrays(dataset_config['PREPROCESS']['image_a'])
+ config_b = config_to_arrays(dataset_config['PREPROCESS']['image_b'])
+
+ image_as, image_bs, flows = map(lambda x: tf.expand_dims(x, 0), [image_a, image_b, flow])
+
+ # Perform data augmentation on GPU
+ with tf.device('/cpu:0'):
+ image_as, image_bs, transforms_from_a, transforms_from_b = \
+ _preprocessing_ops.data_augmentation(image_as,
+ image_bs,
+ global_step,
+ crop,
+ config_a['name'],
+ config_a['rand_type'],
+ config_a['exp'],
+ config_a['mean'],
+ config_a['spread'],
+ config_a['prob'],
+ config_a['coeff_schedule'],
+ config_b['name'],
+ config_b['rand_type'],
+ config_b['exp'],
+ config_b['mean'],
+ config_b['spread'],
+ config_b['prob'],
+ config_b['coeff_schedule'])
+
+ noise_coeff_a = None
+ noise_coeff_b = None
+
+ # Generate and apply noise coeff for A if defined in A params
+ if 'noise' in dataset_config['PREPROCESS']['image_a']:
+ discount_coeff = tf.constant(1.0)
+ if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_a']:
+ initial_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['initial_coeff']
+ final_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['final_coeff']
+ half_life = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['half_life']
+ discount_coeff = initial_coeff + \
+ (final_coeff - initial_coeff) * \
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0)
+
+ noise_coeff_a = _generate_coeff(
+ dataset_config['PREPROCESS']['image_a']['noise'], discount_coeff)
+ noise_a = tf.random_normal(shape=tf.shape(image_as),
+ mean=0.0, stddev=noise_coeff_a,
+ dtype=tf.float32)
+ image_as = tf.clip_by_value(image_as + noise_a, 0.0, 1.0)
+
+ # Generate noise coeff for B if defined in B params
+ if 'noise' in dataset_config['PREPROCESS']['image_b']:
+ discount_coeff = tf.constant(1.0)
+ if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_b']:
+ initial_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['initial_coeff']
+ final_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['final_coeff']
+ half_life = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['half_life']
+ discount_coeff = initial_coeff + \
+ (final_coeff - initial_coeff) * \
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0)
+ noise_coeff_b = _generate_coeff(
+ dataset_config['PREPROCESS']['image_b']['noise'], discount_coeff)
+
+ # Combine coeff from a with coeff from b
+ if noise_coeff_a is not None:
+ if noise_coeff_b is not None:
+ noise_coeff_b = noise_coeff_a * noise_coeff_b
+ else:
+ noise_coeff_b = noise_coeff_a
+
+ # Add noise to B if needed
+ if noise_coeff_b is not None:
+ noise_b = tf.random_normal(shape=tf.shape(image_bs),
+ mean=0.0, stddev=noise_coeff_b,
+ dtype=tf.float32)
+ image_bs = tf.clip_by_value(image_bs + noise_b, 0.0, 1.0)
+
+ # Perform flow augmentation using spatial parameters from data augmentation
+ flows = _preprocessing_ops.flow_augmentation(
+ flows, transforms_from_a, transforms_from_b, crop)
+
+ return tf.train.batch([image_as, image_bs, flows],
+ enqueue_many=True,
+ batch_size=dataset_config['BATCH_SIZE'],
+ capacity=dataset_config['BATCH_SIZE'] * 4,
+ num_threads=num_threads,
+ allow_smaller_final_batch=False)
diff --git a/Codes/flownet2/src/dataset_configs.py b/Codes/flownet2/src/dataset_configs.py
new file mode 100644
index 0000000..fbda5d0
--- /dev/null
+++ b/Codes/flownet2/src/dataset_configs.py
@@ -0,0 +1,153 @@
+"""
+Add dataset configurations here. Each dataset must have the following structure:
+
+NAME = {
+ IMAGE_HEIGHT: int,
+ IMAGE_WIDTH: int,
+ ITEMS_TO_DESCRIPTIONS: {
+ 'image_a': 'A 3-channel image.',
+ 'image_b': 'A 3-channel image.',
+ 'flow': 'A 2-channel optical flow field',
+ },
+ SIZES: {
+ 'train': int,
+ 'validate': int, (optional)
+ ...
+ },
+ BATCH_SIZE: int,
+ PATHS: {
+ 'train': '',
+ 'validate': '', (optional)
+ ...
+ }
+}
+"""
+
+"""
+note that one step = one batch of data processed, ~not~ an entire epoch
+'coeff_schedule_param': {
+ 'half_life': 50000, after this many steps, the value will be i + (f - i)/2
+ 'initial_coeff': 0.5, initial value
+ 'final_coeff': 1, final value
+},
+"""
+
+FLYING_CHAIRS_DATASET_CONFIG = {
+ 'IMAGE_HEIGHT': 384,
+ 'IMAGE_WIDTH': 512,
+ 'ITEMS_TO_DESCRIPTIONS': {
+ 'image_a': 'A 3-channel image.',
+ 'image_b': 'A 3-channel image.',
+ 'flow': 'A 2-channel optical flow field',
+ },
+ 'SIZES': {
+ 'train': 22232,
+ 'validate': 640,
+ 'sample': 8,
+ },
+ 'BATCH_SIZE': 8,
+ 'PATHS': {
+ 'train': './data/tfrecords/fc_train.tfrecords',
+ 'validate': './data/tfrecords/fc_val.tfrecords',
+ 'sample': './data/tfrecords/fc_sample.tfrecords',
+ },
+ 'PREPROCESS': {
+ 'scale': False,
+ 'crop_height': 320,
+ 'crop_width': 448,
+ 'image_a': {
+ 'translate': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'rotate': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'zoom': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': True,
+ 'mean': 0.2,
+ 'spread': 0.4,
+ 'prob': 1.0,
+ },
+ 'squeeze': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.3,
+ 'prob': 1.0,
+ },
+ 'noise': {
+ 'rand_type': "uniform_bernoulli",
+ 'exp': False,
+ 'mean': 0.03,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ },
+ # All preprocessing to image A will be applied to image B in addition to the following.
+ 'image_b': {
+ 'translate': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'rotate': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'zoom': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.03,
+ 'prob': 1.0,
+ },
+ 'gamma': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'brightness': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': False,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'contrast': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'color': {
+ 'rand_type': "gaussian_bernoulli",
+ 'exp': True,
+ 'mean': 0,
+ 'spread': 0.02,
+ 'prob': 1.0,
+ },
+ 'coeff_schedule_param': {
+ 'half_life': 50000,
+ 'initial_coeff': 0.5,
+ 'final_coeff': 1,
+ },
+ }
+ },
+}
diff --git a/Codes/flownet2/src/downsample.py b/Codes/flownet2/src/downsample.py
new file mode 100644
index 0000000..5e6fc95
--- /dev/null
+++ b/Codes/flownet2/src/downsample.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+_downsample = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/downsample.so"))
+
+
+def downsample(tensor, size):
+ return _downsample.downsample(tensor, size)
diff --git a/Codes/flownet2/src/flow_warp.py b/Codes/flownet2/src/flow_warp.py
new file mode 100644
index 0000000..fe5fd4d
--- /dev/null
+++ b/Codes/flownet2/src/flow_warp.py
@@ -0,0 +1,15 @@
+import tensorflow as tf
+
+_flow_warp_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./ops/build/flow_warp.so"))
+
+
+def flow_warp(image, flow):
+ return _flow_warp_ops.flow_warp(image, flow)
+
+
+@tf.RegisterGradient("FlowWarp")
+def _flow_warp_grad(flow_warp_op, gradients):
+ return _flow_warp_ops.flow_warp_grad(flow_warp_op.inputs[0],
+ flow_warp_op.inputs[1],
+ gradients)
diff --git a/Codes/flownet2/src/flowlib.py b/Codes/flownet2/src/flowlib.py
new file mode 100644
index 0000000..36c56d4
--- /dev/null
+++ b/Codes/flownet2/src/flowlib.py
@@ -0,0 +1,554 @@
+#!/usr/bin/python
+"""
+# ==============================
+# flowlib.py
+# library for optical flow processing
+# Author: Ruoteng Li
+# Date: 6th Aug 2016
+# ==============================
+"""
+import png
+import numpy as np
+import matplotlib.colors as cl
+import matplotlib.pyplot as plt
+from PIL import Image
+import tensorflow as tf
+
+
+UNKNOWN_FLOW_THRESH = 1e7
+SMALLFLOW = 0.0
+LARGEFLOW = 1e8
+
+"""
+=============
+Flow Section
+=============
+"""
+
+
+def show_flow(filename):
+ """
+ visualize optical flow map using matplotlib
+ :param filename: optical flow file
+ :return: None
+ """
+ flow = read_flow(filename)
+ img = flow_to_image(flow)
+ plt.imshow(img)
+ plt.show()
+
+
+def visualize_flow(flow, mode='Y'):
+ """
+ this function visualize the input flow
+ :param flow: input flow in array
+ :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color)
+ :return: None
+ """
+ if mode == 'Y':
+ # Ccbcr color wheel
+ img = flow_to_image(flow)
+ plt.imshow(img)
+ plt.show()
+ elif mode == 'RGB':
+ (h, w) = flow.shape[0:2]
+ du = flow[:, :, 0]
+ dv = flow[:, :, 1]
+ valid = flow[:, :, 2]
+ max_flow = max(np.max(du), np.max(dv))
+ img = np.zeros((h, w, 3), dtype=np.float64)
+ # angle layer
+ img[:, :, 0] = np.arctan2(dv, du) / (2 * np.pi)
+ # magnitude layer, normalized to 1
+ img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow
+ # phase layer
+ img[:, :, 2] = 8 - img[:, :, 1]
+ # clip to [0,1]
+ small_idx = img[:, :, 0:3] < 0
+ large_idx = img[:, :, 0:3] > 1
+ img[small_idx] = 0
+ img[large_idx] = 1
+ # convert to rgb
+ img = cl.hsv_to_rgb(img)
+ # remove invalid point
+ img[:, :, 0] = img[:, :, 0] * valid
+ img[:, :, 1] = img[:, :, 1] * valid
+ img[:, :, 2] = img[:, :, 2] * valid
+ # show
+ plt.imshow(img)
+ plt.show()
+
+ return None
+
+
+def read_flow(filename):
+ """
+ read optical flow from Middlebury .flo file
+ :param filename: name of the flow file
+ :return: optical flow data in matrix
+ """
+ f = open(filename, 'rb')
+ magic = np.fromfile(f, np.float32, count=1)
+ data2d = None
+
+ if 202021.25 != magic:
+ print('Magic number incorrect. Invalid .flo file')
+ else:
+ w = np.fromfile(f, np.int32, count=1)
+ h = np.fromfile(f, np.int32, count=1)
+ print("Reading %d x %d flo file" % (h, w))
+ data2d = np.fromfile(f, np.float32, count=2 * w * h)
+ # reshape data into 3D array (columns, rows, channels)
+ data2d = np.resize(data2d, (h[0], w[0], 2))
+ f.close()
+ return data2d
+
+
+def read_flow_png(flow_file):
+ """
+ Read optical flow from KITTI .png file
+ :param flow_file: name of the flow file
+ :return: optical flow data in matrix
+ """
+ flow_object = png.Reader(filename=flow_file)
+ flow_direct = flow_object.asDirect()
+ flow_data = list(flow_direct[2])
+ (w, h) = flow_direct[3]['size']
+ flow = np.zeros((h, w, 3), dtype=np.float64)
+ for i in range(len(flow_data)):
+ flow[i, :, 0] = flow_data[i][0::3]
+ flow[i, :, 1] = flow_data[i][1::3]
+ flow[i, :, 2] = flow_data[i][2::3]
+
+ invalid_idx = (flow[:, :, 2] == 0)
+ flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0
+ flow[invalid_idx, 0] = 0
+ flow[invalid_idx, 1] = 0
+ return flow
+
+
+def write_flow(flow, filename):
+ """
+ write optical flow in Middlebury .flo format
+ :param flow: optical flow map
+ :param filename: optical flow file path to be saved
+ :return: None
+ """
+ f = open(filename, 'wb')
+ magic = np.array([202021.25], dtype=np.float32)
+ (height, width) = flow.shape[0:2]
+ w = np.array([width], dtype=np.int32)
+ h = np.array([height], dtype=np.int32)
+ magic.tofile(f)
+ w.tofile(f)
+ h.tofile(f)
+ flow.tofile(f)
+ f.close()
+
+
+def segment_flow(flow):
+ h = flow.shape[0]
+ w = flow.shape[1]
+ u = flow[:, :, 0]
+ v = flow[:, :, 1]
+
+ idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW))
+ idx2 = (abs(u) == SMALLFLOW)
+ class0 = (v == 0) & (u == 0)
+ u[idx2] = 0.00001
+ tan_value = v / u
+
+ class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0)
+ class2 = (tan_value >= 1) & (u >= 0) & (v >= 0)
+ class3 = (tan_value < -1) & (u <= 0) & (v >= 0)
+ class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0)
+ class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0)
+ class7 = (tan_value < -1) & (u >= 0) & (v <= 0)
+ class6 = (tan_value >= 1) & (u <= 0) & (v <= 0)
+ class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0)
+
+ seg = np.zeros((h, w))
+
+ seg[class1] = 1
+ seg[class2] = 2
+ seg[class3] = 3
+ seg[class4] = 4
+ seg[class5] = 5
+ seg[class6] = 6
+ seg[class7] = 7
+ seg[class8] = 8
+ seg[class0] = 0
+ seg[idx] = 0
+
+ return seg
+
+
+def flow_error(tu, tv, u, v):
+ """
+ Calculate average end point error
+ :param tu: ground-truth horizontal flow map
+ :param tv: ground-truth vertical flow map
+ :param u: estimated horizontal flow map
+ :param v: estimated vertical flow map
+ :return: End point error of the estimated flow
+ """
+ smallflow = 0.0
+ '''
+ stu = tu[bord+1:end-bord,bord+1:end-bord]
+ stv = tv[bord+1:end-bord,bord+1:end-bord]
+ su = u[bord+1:end-bord,bord+1:end-bord]
+ sv = v[bord+1:end-bord,bord+1:end-bord]
+ '''
+ stu = tu[:]
+ stv = tv[:]
+ su = u[:]
+ sv = v[:]
+
+ idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
+ stu[idxUnknow] = 0
+ stv[idxUnknow] = 0
+ su[idxUnknow] = 0
+ sv[idxUnknow] = 0
+
+ ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
+ index_su = su[ind2]
+ index_sv = sv[ind2]
+ an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
+ un = index_su * an
+ vn = index_sv * an
+
+ index_stu = stu[ind2]
+ index_stv = stv[ind2]
+ tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
+ tun = index_stu * tn
+ tvn = index_stv * tn
+
+ '''
+ angle = un * tun + vn * tvn + (an * tn)
+ index = [angle == 1.0]
+ angle[index] = 0.999
+ ang = np.arccos(angle)
+ mang = np.mean(ang)
+ mang = mang * 180 / np.pi
+ '''
+
+ epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
+ epe = epe[ind2]
+ mepe = np.mean(epe)
+ return mepe
+
+
+def flow_to_image(flow):
+ """
+ Convert flow into middlebury color code image
+ :param flow: optical flow map
+ :return: optical flow image in middlebury color
+ """
+ u = flow[:, :, 0]
+ v = flow[:, :, 1]
+
+ maxu = -999.
+ maxv = -999.
+ minu = 999.
+ minv = 999.
+
+ idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
+ u[idxUnknow] = 0
+ v[idxUnknow] = 0
+
+ maxu = max(maxu, np.max(u))
+ minu = min(minu, np.min(u))
+
+ maxv = max(maxv, np.max(v))
+ minv = min(minv, np.min(v))
+
+ rad = np.sqrt(u ** 2 + v ** 2)
+ maxrad = max(-1, np.max(rad))
+
+ # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu,maxu, minv, maxv))
+
+ u = u/(maxrad + np.finfo(float).eps)
+ v = v/(maxrad + np.finfo(float).eps)
+
+ img = compute_color(u, v)
+
+ idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+ img[idx] = 0
+
+ return np.uint8(img)
+
+
+def tf_flow_to_image(flow):
+ """
+ Convert flow into middlebury color code image
+ :param flow: optical flow map
+ :return: optical flow image in middlebury color
+ """
+ u = flow[:, :, :, 0]
+ v = flow[:, :, :, 1]
+
+ maxu = tf.constant(-999.)
+ maxv = tf.constant(-999.)
+ minu = tf.constant(999.)
+ minv = tf.constant(999.)
+
+ zeros = tf.zeros_like(u, dtype=tf.float32)
+ u = tf.where(tf.greater(u, UNKNOWN_FLOW_THRESH), zeros, u)
+ v = tf.where(tf.greater(v, UNKNOWN_FLOW_THRESH), zeros, v)
+
+ rad = tf.sqrt(u ** 2 + v ** 2)
+ maxrad = tf.reduce_max(-1, tf.reduce_max(rad))
+
+ # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu, maxu, minv, maxv))
+
+ u = u / (maxrad + np.finfo(float).eps)
+ v = v / (maxrad + np.finfo(float).eps)
+
+ img = compute_color(u, v)
+
+ # idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
+ # img[idx] = 0
+
+ return np.uint8(img)
+
+
+def evaluate_flow_file(gt, pred):
+ """
+ evaluate the estimated optical flow end point error according to ground truth provided
+ :param gt: ground truth file path
+ :param pred: estimated optical flow file path
+ :return: end point error, float32
+ """
+ # Read flow files and calculate the errors
+ gt_flow = read_flow(gt) # ground truth flow
+ eva_flow = read_flow(pred) # predicted flow
+ # Calculate errors
+ average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1])
+ return average_pe
+
+
+def evaluate_flow(gt_flow, pred_flow):
+ """
+ gt: ground-truth flow
+ pred: estimated flow
+ """
+ average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])
+ return average_pe
+
+
+"""
+==============
+Disparity Section
+==============
+"""
+
+
+def read_disp_png(file_name):
+ """
+ Read optical flow from KITTI .png file
+ :param file_name: name of the flow file
+ :return: optical flow data in matrix
+ """
+ image_object = png.Reader(filename=file_name)
+ image_direct = image_object.asDirect()
+ image_data = list(image_direct[2])
+ (w, h) = image_direct[3]['size']
+ channel = len(image_data[0]) / w
+ flow = np.zeros((h, w, channel), dtype=np.uint16)
+ for i in range(len(image_data)):
+ for j in range(channel):
+ flow[i, :, j] = image_data[i][j::channel]
+ return flow[:, :, 0] / 256
+
+
+def disp_to_flowfile(disp, filename):
+ """
+ Read KITTI disparity file in png format
+ :param disp: disparity matrix
+ :param filename: the flow file name to save
+ :return: None
+ """
+ f = open(filename, 'wb')
+ magic = np.array([202021.25], dtype=np.float32)
+ (height, width) = disp.shape[0:2]
+ w = np.array([width], dtype=np.int32)
+ h = np.array([height], dtype=np.int32)
+ empty_map = np.zeros((height, width), dtype=np.float32)
+ data = np.dstack((disp, empty_map))
+ magic.tofile(f)
+ w.tofile(f)
+ h.tofile(f)
+ data.tofile(f)
+ f.close()
+
+
+"""
+==============
+Image Section
+==============
+"""
+
+
+def read_image(filename):
+ """
+ Read normal image of any format
+ :param filename: name of the image file
+ :return: image data in matrix uint8 type
+ """
+ img = Image.open(filename)
+ im = np.array(img)
+ return im
+
+
+def warp_image(im, flow):
+ """
+ Use optical flow to warp image to the next
+ :param im: image to warp
+ :param flow: optical flow
+ :return: warped image
+ """
+ from scipy import interpolate
+ image_height = im.shape[0]
+ image_width = im.shape[1]
+ flow_height = flow.shape[0]
+ flow_width = flow.shape[1]
+ n = image_height * image_width
+ (iy, ix) = np.mgrid[0:image_height, 0:image_width]
+ (fy, fx) = np.mgrid[0:flow_height, 0:flow_width]
+ fx += flow[:,:,0]
+ fy += flow[:,:,1]
+ mask = np.logical_or(fx <0 , fx > flow_width)
+ mask = np.logical_or(mask, fy < 0)
+ mask = np.logical_or(mask, fy > flow_height)
+ fx = np.minimum(np.maximum(fx, 0), flow_width)
+ fy = np.minimum(np.maximum(fy, 0), flow_height)
+ points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)
+ xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)
+ warp = np.zeros((image_height, image_width, im.shape[2]))
+ for i in range(im.shape[2]):
+ channel = im[:, :, i]
+ plt.imshow(channel, cmap='gray')
+ values = channel.reshape(n, 1)
+ new_channel = interpolate.griddata(points, values, xi, method='cubic')
+ new_channel = np.reshape(new_channel, [flow_height, flow_width])
+ new_channel[mask] = 1
+ warp[:, :, i] = new_channel.astype(np.uint8)
+
+ return warp.astype(np.uint8)
+
+
+"""
+==============
+Others
+==============
+"""
+
+
+def scale_image(image, new_range):
+ """
+ Linearly scale the image into desired range
+ :param image: input image
+ :param new_range: the new range to be aligned
+ :return: image normalized in new range
+ """
+ min_val = np.min(image).astype(np.float32)
+ max_val = np.max(image).astype(np.float32)
+ min_val_new = np.array(min(new_range), dtype=np.float32)
+ max_val_new = np.array(max(new_range), dtype=np.float32)
+ scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new
+ return scaled_image.astype(np.uint8)
+
+
+def compute_color(u, v):
+ """
+ compute optical flow color map
+ :param u: optical flow horizontal map
+ :param v: optical flow vertical map
+ :return: optical flow in color code
+ """
+ [h, w] = u.shape
+ img = np.zeros([h, w, 3])
+ nanIdx = np.isnan(u) | np.isnan(v)
+ u[nanIdx] = 0
+ v[nanIdx] = 0
+
+ colorwheel = make_color_wheel()
+ # ncols = np.size(colorwheel, 0)
+ ncols = colorwheel.shape[0]
+
+ rad = np.sqrt(u**2+v**2)
+
+ a = np.arctan2(-v, -u) / np.pi
+
+ fk = (a+1) / 2 * (ncols - 1) + 1
+
+ k0 = np.floor(fk).astype(int)
+
+ k1 = k0 + 1
+ k1[k1 == ncols+1] = 1
+ f = fk - k0
+
+ for i in range(0, np.size(colorwheel, 1)):
+ tmp = colorwheel[:, i]
+ col0 = tmp[k0-1] / 255
+ col1 = tmp[k1-1] / 255
+ col = (1-f) * col0 + f * col1
+
+ idx = rad <= 1
+ col[idx] = 1-rad[idx]*(1-col[idx])
+ notidx = np.logical_not(idx)
+
+ col[notidx] *= 0.75
+ img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))
+
+ return img
+
+
+def make_color_wheel():
+ """
+ Generate color wheel according Middlebury color code
+ :return: Color wheel
+ """
+ RY = 15
+ YG = 6
+ GC = 4
+ CB = 11
+ BM = 13
+ MR = 6
+
+ ncols = RY + YG + GC + CB + BM + MR
+
+ colorwheel = np.zeros([ncols, 3])
+
+ col = 0
+
+ # RY
+ colorwheel[0:RY, 0] = 255
+ colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
+ col += RY
+
+ # YG
+ colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
+ colorwheel[col:col+YG, 1] = 255
+ col += YG
+
+ # GC
+ colorwheel[col:col+GC, 1] = 255
+ colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
+ col += GC
+
+ # CB
+ colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
+ colorwheel[col:col+CB, 2] = 255
+ col += CB
+
+ # BM
+ colorwheel[col:col+BM, 2] = 255
+ colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
+ col += + BM
+
+ # MR
+ colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
+ colorwheel[col:col+MR, 0] = 255
+
+ return colorwheel
diff --git a/Codes/flownet2/src/flownet2/__init__.py b/Codes/flownet2/src/flownet2/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/__init__.py
diff --git a/Codes/flownet2/src/flownet2/flownet2.py b/Codes/flownet2/src/flownet2/flownet2.py
new file mode 100644
index 0000000..d44ed10
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/flownet2.py
@@ -0,0 +1,118 @@
+from ..net import Net, Mode
+from ..flownet_css.flownet_css import FlowNetCSS
+from ..flownet_sd.flownet_sd import FlowNetSD
+from ..flow_warp import flow_warp
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..downsample import downsample
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNet2(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_css = FlowNetCSS(mode, debug)
+ self.net_sd = FlowNetSD(mode, debug)
+ super(FlowNet2, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNet2'):
+ # Forward pass through FlowNetCSS and FlowNetSD with weights frozen
+ net_css_predictions = self.net_css.model(inputs, training_schedule, trainable=True)
+ net_sd_predictions = self.net_sd.model(inputs, training_schedule, trainable=True)
+
+ def ChannelNorm(tensor):
+ sq = tf.square(tensor)
+ r_sum = tf.reduce_sum(sq, keep_dims=True, axis=3)
+ return tf.sqrt(r_sum)
+
+ sd_flow_norm = ChannelNorm(net_sd_predictions['flow'])
+ css_flow_norm = ChannelNorm(net_css_predictions['flow'])
+
+ flow_warp_sd = flow_warp(inputs['input_b'], net_sd_predictions['flow'])
+ img_diff_sd = inputs['input_a'] - flow_warp_sd
+ img_diff_sd_norm = ChannelNorm(img_diff_sd)
+
+ flow_warp_css = flow_warp(inputs['input_b'], net_css_predictions['flow'])
+ img_diff_css = inputs['input_a'] - flow_warp_css
+ img_diff_css_norm = ChannelNorm(img_diff_css)
+
+ input_to_fusion = tf.concat([inputs['input_a'],
+ net_sd_predictions['flow'],
+ net_css_predictions['flow'],
+ sd_flow_norm,
+ css_flow_norm,
+ img_diff_sd_norm,
+ img_diff_css_norm], axis=3)
+
+ # Fusion Network
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ fuse_conv0 = slim.conv2d(pad(input_to_fusion), 64, 3, scope='fuse_conv0')
+ fuse_conv1 = slim.conv2d(pad(fuse_conv0), 64, 3, stride=2, scope='fuse_conv1')
+ fuse_conv1_1 = slim.conv2d(pad(fuse_conv1), 128, 3, scope='fuse_conv1_1')
+ fuse_conv2 = slim.conv2d(pad(fuse_conv1_1), 128, 3,
+ stride=2, scope='fuse_conv2')
+ fuse_conv2_1 = slim.conv2d(pad(fuse_conv2), 128, 3, scope='fuse_conv2_1')
+
+ predict_flow2 = slim.conv2d(pad(fuse_conv2_1), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ fuse_deconv1 = antipad(slim.conv2d_transpose(fuse_conv2_1, 32, 4,
+ stride=2,
+ scope='fuse_deconv1'))
+ fuse_upsample_flow2to1 = antipad(slim.conv2d_transpose(predict_flow2, 2, 4,
+ stride=2,
+ scope='fuse_upsample_flow2to1',
+ activation_fn=None))
+ concat1 = tf.concat([fuse_conv1_1, fuse_deconv1,
+ fuse_upsample_flow2to1], axis=3)
+ fuse_interconv1 = slim.conv2d(pad(concat1), 32, 3,
+ activation_fn=None, scope='fuse_interconv1')
+
+ predict_flow1 = slim.conv2d(pad(fuse_interconv1), 2, 3,
+ scope='predict_flow1',
+ activation_fn=None)
+ fuse_deconv0 = antipad(slim.conv2d_transpose(concat1, 16, 4,
+ stride=2,
+ scope='fuse_deconv0'))
+ fuse_upsample_flow1to0 = antipad(slim.conv2d_transpose(predict_flow1, 2, 4,
+ stride=2,
+ scope='fuse_upsample_flow1to0',
+ activation_fn=None))
+ concat0 = tf.concat([fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0], axis=3)
+ fuse_interconv0 = slim.conv2d(pad(concat0), 16, 3,
+ activation_fn=None, scope='fuse_interconv0')
+
+ predict_flow0 = slim.conv2d(pad(fuse_interconv0), 2,
+ 3, activation_fn=None, scope='predict_flow0')
+
+ flow = tf.image.resize_bilinear(
+ predict_flow0, tf.stack([height, width]), align_corners=True)
+ print(predict_flow0)
+ print(flow)
+ return {
+ 'predict_flow0': predict_flow0,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ # L2 loss between predict_flow0, true flow (weighted w/ 0.005)
+ predict_flow0 = predictions['predict_flow0']
+ size = [predict_flow0.shape[1], predict_flow0.shape[2]]
+ downsampled_flow0 = downsample(flow, size)
+ loss = average_endpoint_error(downsampled_flow0, predict_flow0)
+ tf.losses.add_loss(loss)
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet2/test.py b/Codes/flownet2/src/flownet2/test.py
new file mode 100644
index 0000000..3177614
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet2 import FlowNet2
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNet2(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet2/train.py b/Codes/flownet2/src/flownet2/train.py
new file mode 100644
index 0000000..40c028d
--- /dev/null
+++ b/Codes/flownet2/src/flownet2/train.py
@@ -0,0 +1,24 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet2 import FlowNet2
+
+# Create a new network
+net = FlowNet2()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_2',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for CSS and SD parts of network
+ checkpoints={
+ './checkpoints/FlowNetCSS-ft-sd/flownet-CSS-ft-sd.ckpt-0': ('FlowNet2/FlowNetCSS', 'FlowNet2'),
+ './checkpoints/FlowNetSD/flownet-SD.ckpt-0': ('FlowNet2/FlowNetSD', 'FlowNet2')
+ }
+)
diff --git a/Codes/flownet2/src/flownet_c/__init__.py b/Codes/flownet2/src/flownet_c/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/__init__.py
diff --git a/Codes/flownet2/src/flownet_c/flownet_c.py b/Codes/flownet2/src/flownet_c/flownet_c.py
new file mode 100644
index 0000000..d333ee2
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/flownet_c.py
@@ -0,0 +1,167 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..correlation import correlation
+from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetC(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetC, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNetC'):
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ with slim.arg_scope([slim.conv2d], stride=2):
+ conv_a_1 = slim.conv2d(pad(inputs['input_a'], 3), 64, 7, scope='conv1')
+ conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2')
+ conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3')
+
+ conv_b_1 = slim.conv2d(pad(inputs['input_b'], 3),
+ 64, 7, scope='conv1', reuse=True)
+ conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True)
+ conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True)
+
+ # Compute cross correlation with leaky relu activation
+ cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20)
+ cc_relu = LeakyReLU(cc)
+
+ # Combine cross correlation results with convolution of feature map A
+ netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir')
+ # Concatenate along the channels axis
+ net = tf.concat([netA_conv, cc_relu], axis=3)
+
+ conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1')
+ with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
+ conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+
+ predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+
+ predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+
+ predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3)
+
+ predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 20.0
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ flow = flow * 0.05
+
+ losses = []
+ INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+
+ # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ predict_flow6 = predictions['predict_flow6']
+ size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ downsampled_flow6 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+
+ # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ predict_flow5 = predictions['predict_flow5']
+ size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ downsampled_flow5 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+
+ # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ predict_flow4 = predictions['predict_flow4']
+ size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ downsampled_flow4 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+
+ # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ predict_flow3 = predictions['predict_flow3']
+ size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ downsampled_flow3 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+
+ # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ predict_flow2 = predictions['predict_flow2']
+ size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ downsampled_flow2 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+
+ loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_c/test.py b/Codes/flownet2/src/flownet_c/test.py
new file mode 100644
index 0000000..692f22d
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_c import FlowNetC
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetC(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetC/flownet-C.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_c/train.py b/Codes/flownet2/src/flownet_c/train.py
new file mode 100644
index 0000000..9296ac7
--- /dev/null
+++ b/Codes/flownet2/src/flownet_c/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_c import FlowNetC
+
+# Create a new network
+net = FlowNetC()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_c',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/flownet_cs/__init__.py b/Codes/flownet2/src/flownet_cs/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/__init__.py
diff --git a/Codes/flownet2/src/flownet_cs/flownet_cs.py b/Codes/flownet2/src/flownet_cs/flownet_cs.py
new file mode 100644
index 0000000..aeaea47
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/flownet_cs.py
@@ -0,0 +1,41 @@
+from ..net import Net, Mode
+from ..flownet_c.flownet_c import FlowNetC
+from ..flownet_s.flownet_s import FlowNetS
+from ..flow_warp import flow_warp
+import tensorflow as tf
+
+
+class FlowNetCS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_c = FlowNetC(mode, debug)
+ self.net_s = FlowNetS(mode, debug)
+ super(FlowNetCS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ with tf.variable_scope('FlowNetCS'):
+ # Forward pass through FlowNetC with weights frozen
+ net_c_predictions = self.net_c.model(inputs, training_schedule, trainable=True)
+
+ # Perform flow warping (to move image B closer to image A based on flow prediction)
+ warped = flow_warp(inputs['input_b'], net_c_predictions['flow'])
+
+ # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
+ brightness_error = inputs['input_a'] - warped
+ brightness_error = tf.square(brightness_error)
+ brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
+ brightness_error = tf.sqrt(brightness_error)
+
+ # Gather all inputs to FlowNetS
+ inputs_to_s = {
+ 'input_a': inputs['input_a'],
+ 'input_b': inputs['input_b'],
+ 'warped': warped,
+ 'flow': net_c_predictions['flow'] * 0.05,
+ 'brightness_error': brightness_error,
+ }
+
+ return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
+
+ def loss(self, flow, predictions):
+ return self.net_s.loss(flow, predictions)
diff --git a/Codes/flownet2/src/flownet_cs/test.py b/Codes/flownet2/src/flownet_cs/test.py
new file mode 100644
index 0000000..ae00ff4
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_cs import FlowNetCS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetCS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetCS/flownet-CS.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_cs/train.py b/Codes/flownet2/src/flownet_cs/train.py
new file mode 100644
index 0000000..9376132
--- /dev/null
+++ b/Codes/flownet2/src/flownet_cs/train.py
@@ -0,0 +1,21 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_cs import FlowNetCS
+
+# Create a new network
+net = FlowNetCS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_cs',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for C part of network
+ checkpoints={'./checkpoints/FlowNetC/flownet-C.ckpt-0': ('FlowNetCS/FlowNetC', 'FlowNetCS')}
+)
diff --git a/Codes/flownet2/src/flownet_css/__init__.py b/Codes/flownet2/src/flownet_css/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/__init__.py
diff --git a/Codes/flownet2/src/flownet_css/flownet_css.py b/Codes/flownet2/src/flownet_css/flownet_css.py
new file mode 100644
index 0000000..93d9db2
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/flownet_css.py
@@ -0,0 +1,41 @@
+from ..net import Net, Mode
+from ..flownet_cs.flownet_cs import FlowNetCS
+from ..flownet_s.flownet_s import FlowNetS
+from ..flow_warp import flow_warp
+import tensorflow as tf
+
+
+class FlowNetCSS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.net_cs = FlowNetCS(mode, debug)
+ self.net_s = FlowNetS(mode, debug)
+ super(FlowNetCSS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ with tf.variable_scope('FlowNetCSS'):
+ # Forward pass through FlowNetCS with weights frozen
+ net_cs_predictions = self.net_cs.model(inputs, training_schedule, trainable=True)
+
+ # Perform flow warping (to move image B closer to image A based on flow prediction)
+ warped = flow_warp(inputs['input_b'], net_cs_predictions['flow'])
+
+ # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels)
+ brightness_error = inputs['input_a'] - warped
+ brightness_error = tf.square(brightness_error)
+ brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3)
+ brightness_error = tf.sqrt(brightness_error)
+
+ # Gather all inputs to FlowNetS
+ inputs_to_s = {
+ 'input_a': inputs['input_a'],
+ 'input_b': inputs['input_b'],
+ 'warped': warped,
+ 'flow': net_cs_predictions['flow'] * 0.05,
+ 'brightness_error': brightness_error,
+ }
+
+ return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable)
+
+ def loss(self, flow, predictions):
+ return self.net_s.loss(flow, predictions)
diff --git a/Codes/flownet2/src/flownet_css/test.py b/Codes/flownet2/src/flownet_css/test.py
new file mode 100644
index 0000000..9d1249e
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_css import FlowNetCSS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetCSS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetCSS/flownet-CSS.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_css/train.py b/Codes/flownet2/src/flownet_css/train.py
new file mode 100644
index 0000000..2964f3e
--- /dev/null
+++ b/Codes/flownet2/src/flownet_css/train.py
@@ -0,0 +1,22 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_css import FlowNetCSS
+
+# Create a new network
+net = FlowNetCSS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_css',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow,
+ # Load trained weights for CS part of network
+ checkpoints={
+ './checkpoints/FlowNetCS/flownet-CS.ckpt-0': ('FlowNetCSS/FlowNetCS', 'FlowNetCSS')}
+)
diff --git a/Codes/flownet2/src/flownet_s/__init__.py b/Codes/flownet2/src/flownet_s/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/__init__.py
diff --git a/Codes/flownet2/src/flownet_s/flownet_s.py b/Codes/flownet2/src/flownet_s/flownet_s.py
new file mode 100644
index 0000000..f6704b1
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/flownet_s.py
@@ -0,0 +1,161 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetS(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetS, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ stacked = False
+ with tf.variable_scope('FlowNetS'):
+ if 'warped' in inputs and 'flow' in inputs and 'brightness_error' in inputs:
+ stacked = True
+ concat_inputs = tf.concat([inputs['input_a'],
+ inputs['input_b'],
+ inputs['warped'],
+ inputs['flow'],
+ inputs['brightness_error']], axis=3)
+ else:
+ concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ with slim.arg_scope([slim.conv2d], stride=2):
+ conv_1 = slim.conv2d(pad(concat_inputs, 3), 64, 7, scope='conv1')
+ conv_2 = slim.conv2d(pad(conv_1, 2), 128, 5, scope='conv2')
+ conv_3 = slim.conv2d(pad(conv_2, 2), 256, 5, scope='conv3')
+
+ conv3_1 = slim.conv2d(pad(conv_3), 256, 3, scope='conv3_1')
+ with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3):
+ conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+
+ predict_flow5 = slim.conv2d(pad(concat5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+
+ predict_flow4 = slim.conv2d(pad(concat4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+
+ predict_flow3 = slim.conv2d(pad(concat3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv_2, deconv2, upsample_flow3to2], axis=3)
+
+ predict_flow2 = slim.conv2d(pad(concat2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 20.0
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ def loss(self, flow, predictions):
+ flow = flow * 0.05
+
+ losses = []
+ INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+
+ # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ predict_flow6 = predictions['predict_flow6']
+ size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ downsampled_flow6 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+
+ # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ predict_flow5 = predictions['predict_flow5']
+ size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ downsampled_flow5 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+
+ # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ predict_flow4 = predictions['predict_flow4']
+ size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ downsampled_flow4 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+
+ # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ predict_flow3 = predictions['predict_flow3']
+ size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ downsampled_flow3 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+
+ # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ predict_flow2 = predictions['predict_flow2']
+ size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ downsampled_flow2 = downsample(flow, size)
+ losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+
+ loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+
+ # Return the 'total' loss: loss fns + regularization terms defined in the model
+ return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_s/test.py b/Codes/flownet2/src/flownet_s/test.py
new file mode 100644
index 0000000..ae1b2f3
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_s import FlowNetS
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetS(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetS/flownet-S.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_s/train.py b/Codes/flownet2/src/flownet_s/train.py
new file mode 100644
index 0000000..13a792a
--- /dev/null
+++ b/Codes/flownet2/src/flownet_s/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_s import FlowNetS
+
+# Create a new network
+net = FlowNetS()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_s_sample',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/flownet_sd/__init__.py b/Codes/flownet2/src/flownet_sd/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/__init__.py
diff --git a/Codes/flownet2/src/flownet_sd/flownet_sd.py b/Codes/flownet2/src/flownet_sd/flownet_sd.py
new file mode 100644
index 0000000..2f5c9e4
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/flownet_sd.py
@@ -0,0 +1,160 @@
+from ..net import Net, Mode
+from ..utils import LeakyReLU, average_endpoint_error, pad, antipad
+# from ..downsample import downsample
+import math
+import tensorflow as tf
+slim = tf.contrib.slim
+
+
+class FlowNetSD(Net):
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ super(FlowNetSD, self).__init__(mode=mode, debug=debug)
+
+ def model(self, inputs, training_schedule, trainable=True, reuse=None):
+ _, height, width, _ = inputs['input_a'].shape.as_list()
+ with tf.variable_scope('FlowNetSD', reuse=reuse):
+ concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3)
+ with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
+ # Only backprop this network if trainable
+ trainable=trainable,
+ # He (aka MSRA) weight initialization
+ weights_initializer=slim.variance_scaling_initializer(),
+ activation_fn=LeakyReLU,
+ # We will do our own padding to match the original Caffe code
+ padding='VALID'):
+
+ weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay'])
+ with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer):
+ conv0 = slim.conv2d(pad(concat_inputs), 64, 3, scope='conv0')
+ conv1 = slim.conv2d(pad(conv0), 64, 3, stride=2, scope='conv1')
+ conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope='conv1_1')
+ conv2 = slim.conv2d(pad(conv1_1), 128, 3, stride=2, scope='conv2')
+ conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope='conv2_1')
+ conv3 = slim.conv2d(pad(conv2_1), 256, 3, stride=2, scope='conv3')
+ conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope='conv3_1')
+ conv4 = slim.conv2d(pad(conv3_1), 512, 3, stride=2, scope='conv4')
+ conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope='conv4_1')
+ conv5 = slim.conv2d(pad(conv4_1), 512, 3, stride=2, scope='conv5')
+ conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope='conv5_1')
+ conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6')
+ conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1')
+
+ """ START: Refinement Network """
+ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None):
+ predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3,
+ scope='predict_flow6',
+ activation_fn=None)
+ deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4,
+ stride=2,
+ scope='deconv5'))
+ upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4,
+ stride=2,
+ scope='upsample_flow6to5',
+ activation_fn=None))
+ concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3)
+ interconv5 = slim.conv2d(pad(concat5), 512, 3,
+ activation_fn=None, scope='interconv5')
+
+ predict_flow5 = slim.conv2d(pad(interconv5), 2, 3,
+ scope='predict_flow5',
+ activation_fn=None)
+ deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4,
+ stride=2,
+ scope='deconv4'))
+ upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4,
+ stride=2,
+ scope='upsample_flow5to4',
+ activation_fn=None))
+ concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3)
+ interconv4 = slim.conv2d(pad(concat4), 256, 3,
+ activation_fn=None, scope='interconv4')
+
+ predict_flow4 = slim.conv2d(pad(interconv4), 2, 3,
+ scope='predict_flow4',
+ activation_fn=None)
+ deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4,
+ stride=2,
+ scope='deconv3'))
+ upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4,
+ stride=2,
+ scope='upsample_flow4to3',
+ activation_fn=None))
+ concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3)
+ interconv3 = slim.conv2d(pad(concat3), 128, 3,
+ activation_fn=None, scope='interconv3')
+
+ predict_flow3 = slim.conv2d(pad(interconv3), 2, 3,
+ scope='predict_flow3',
+ activation_fn=None)
+ deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4,
+ stride=2,
+ scope='deconv2'))
+ upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4,
+ stride=2,
+ scope='upsample_flow3to2',
+ activation_fn=None))
+ concat2 = tf.concat([conv2, deconv2, upsample_flow3to2], axis=3)
+ interconv2 = slim.conv2d(pad(concat2), 64, 3,
+ activation_fn=None, scope='interconv2')
+
+ predict_flow2 = slim.conv2d(pad(interconv2), 2, 3,
+ scope='predict_flow2',
+ activation_fn=None)
+ """ END: Refinement Network """
+
+ flow = predict_flow2 * 0.05
+ # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different
+ flow = tf.image.resize_bilinear(flow,
+ tf.stack([height, width]),
+ align_corners=True)
+
+ return {
+ 'predict_flow6': predict_flow6,
+ 'predict_flow5': predict_flow5,
+ 'predict_flow4': predict_flow4,
+ 'predict_flow3': predict_flow3,
+ 'predict_flow2': predict_flow2,
+ 'flow': flow,
+ }
+
+ # def loss(self, flow, predictions):
+ # flow = flow * 20.0
+ #
+ # losses = []
+ # INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value)
+ #
+ # # L2 loss between predict_flow6, blob23 (weighted w/ 0.32)
+ # predict_flow6 = predictions['predict_flow6']
+ # size = [predict_flow6.shape[1], predict_flow6.shape[2]]
+ # downsampled_flow6 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow6, predict_flow6))
+ #
+ # # L2 loss between predict_flow5, blob28 (weighted w/ 0.08)
+ # predict_flow5 = predictions['predict_flow5']
+ # size = [predict_flow5.shape[1], predict_flow5.shape[2]]
+ # downsampled_flow5 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow5, predict_flow5))
+ #
+ # # L2 loss between predict_flow4, blob33 (weighted w/ 0.02)
+ # predict_flow4 = predictions['predict_flow4']
+ # size = [predict_flow4.shape[1], predict_flow4.shape[2]]
+ # downsampled_flow4 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow4, predict_flow4))
+ #
+ # # L2 loss between predict_flow3, blob38 (weighted w/ 0.01)
+ # predict_flow3 = predictions['predict_flow3']
+ # size = [predict_flow3.shape[1], predict_flow3.shape[2]]
+ # downsampled_flow3 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow3, predict_flow3))
+ #
+ # # L2 loss between predict_flow2, blob43 (weighted w/ 0.005)
+ # predict_flow2 = predictions['predict_flow2']
+ # size = [predict_flow2.shape[1], predict_flow2.shape[2]]
+ # downsampled_flow2 = downsample(flow, size)
+ # losses.append(average_endpoint_error(downsampled_flow2, predict_flow2))
+ #
+ # loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005])
+ #
+ # # Return the 'total' loss: loss fns + regularization terms defined in the model
+ # return tf.losses.get_total_loss()
diff --git a/Codes/flownet2/src/flownet_sd/test.py b/Codes/flownet2/src/flownet_sd/test.py
new file mode 100644
index 0000000..b2ac285
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/test.py
@@ -0,0 +1,51 @@
+import argparse
+import os
+from ..net import Mode
+from .flownet_sd import FlowNetSD
+
+FLAGS = None
+
+
+def main():
+ # Create a new network
+ net = FlowNetSD(mode=Mode.TEST)
+
+ # Train on the data
+ net.test(
+ checkpoint='./checkpoints/FlowNetSD/flownet-SD.ckpt-0',
+ input_a_path=FLAGS.input_a,
+ input_b_path=FLAGS.input_b,
+ out_path=FLAGS.out,
+ )
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_a',
+ type=str,
+ required=True,
+ help='Path to first image'
+ )
+ parser.add_argument(
+ '--input_b',
+ type=str,
+ required=True,
+ help='Path to second image'
+ )
+ parser.add_argument(
+ '--out',
+ type=str,
+ required=True,
+ help='Path to output flow result'
+ )
+ FLAGS = parser.parse_args()
+
+ # Verify arguments are valid
+ if not os.path.exists(FLAGS.input_a):
+ raise ValueError('image_a path must exist')
+ if not os.path.exists(FLAGS.input_b):
+ raise ValueError('image_b path must exist')
+ if not os.path.isdir(FLAGS.out):
+ raise ValueError('out directory must exist')
+ main()
diff --git a/Codes/flownet2/src/flownet_sd/train.py b/Codes/flownet2/src/flownet_sd/train.py
new file mode 100644
index 0000000..86c64e5
--- /dev/null
+++ b/Codes/flownet2/src/flownet_sd/train.py
@@ -0,0 +1,19 @@
+from ..dataloader import load_batch
+from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG
+from ..training_schedules import LONG_SCHEDULE
+from .flownet_sd import FlowNetSD
+
+# Create a new network
+net = FlowNetSD()
+
+# Load a batch of data
+input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step)
+
+# Train on the data
+net.train(
+ log_dir='./logs/flownet_sd_sample',
+ training_schedule=LONG_SCHEDULE,
+ input_a=input_a,
+ input_b=input_b,
+ flow=flow
+)
diff --git a/Codes/flownet2/src/net.py b/Codes/flownet2/src/net.py
new file mode 100644
index 0000000..43b2193
--- /dev/null
+++ b/Codes/flownet2/src/net.py
@@ -0,0 +1,177 @@
+import abc
+from enum import Enum
+import os
+import tensorflow as tf
+from .flowlib import flow_to_image, write_flow
+import numpy as np
+# from scipy.misc import imread, imsave, imresize
+import cv2
+import uuid
+from .training_schedules import LONG_SCHEDULE
+slim = tf.contrib.slim
+
+os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+
+class Mode(Enum):
+ TRAIN = 1
+ TEST = 2
+
+
+class Net(object):
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, mode=Mode.TRAIN, debug=False):
+ self.global_step = slim.get_or_create_global_step()
+ self.mode = mode
+ self.debug = debug
+
+ @abc.abstractmethod
+ def model(self, inputs, training_schedule, trainable=True):
+ """
+ Defines the model and returns a tuple of Tensors needed for calculating the loss.
+ """
+ return
+
+ @abc.abstractmethod
+ def loss(self, **kwargs):
+ """
+ Accepts prediction Tensors from the output of `model`.
+ Returns a single Tensor representing the total loss of the model.
+ """
+ return
+ """
+ python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_6.png \
+ --input_b /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_7.png \
+ --out ./
+ python -m src.flownet_sd.test --input_a 006.png --input_b 007.png --out ./
+ python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/ped1/frames/testing/01/006.jpg \
+ --input_b /home/liuwen/ssd/videogan/ped1/frames/testing/01/007.jpg \
+ --out ./
+ """
+ def test(self, checkpoint, input_a_path, input_b_path, out_path, save_image=True, save_flo=False):
+ input_a = cv2.imread(input_a_path)
+ input_b = cv2.imread(input_b_path)
+
+ input_a = cv2.resize(input_a, (512, 384))
+ input_b = cv2.resize(input_b, (512, 384))
+ print(input_a.shape, input_b.shape)
+
+ # Convert from RGB -> BGR
+ # input_a = input_a[..., [2, 1, 0]]
+ # input_b = input_b[..., [2, 1, 0]]
+
+ # Scale from [0, 255] -> [0.0, 1.0] if needed
+ if input_a.max() > 1.0:
+ input_a = input_a / 255.0
+ if input_b.max() > 1.0:
+ input_b = input_b / 255.0
+
+ # TODO: This is a hack, we should get rid of this
+ training_schedule = LONG_SCHEDULE
+
+ inputs = {
+ 'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0),
+ 'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0),
+ }
+ predictions = self.model(inputs, training_schedule)
+ pred_flow = predictions['flow']
+
+ saver = tf.train.Saver()
+
+ config = tf.ConfigProto()
+ config.gpu_options.allow_growth = True
+ with tf.Session(config=config) as sess:
+ saver.restore(sess, checkpoint)
+ pred_flow = sess.run(pred_flow)[0, :, :, :]
+
+ np.save('temporal_ped1', pred_flow)
+
+ unique_name = 'flow-' + str(uuid.uuid4())
+ if save_image:
+ flow_img = flow_to_image(pred_flow)
+ full_out_path = os.path.join(out_path, unique_name + '.png')
+ cv2.imwrite(full_out_path, flow_img)
+
+ if save_flo:
+ full_out_path = os.path.join(out_path, unique_name + '.flo')
+ write_flow(pred_flow, full_out_path)
+
+ def train(self, log_dir, training_schedule, input_a, input_b, flow, checkpoints=None):
+ tf.summary.image("image_a", input_a, max_outputs=2)
+ tf.summary.image("image_b", input_b, max_outputs=2)
+
+ self.learning_rate = tf.train.piecewise_constant(
+ self.global_step,
+ [tf.cast(v, tf.int64) for v in training_schedule['step_values']],
+ training_schedule['learning_rates'])
+
+ optimizer = tf.train.AdamOptimizer(
+ self.learning_rate,
+ training_schedule['momentum'],
+ training_schedule['momentum2'])
+
+ inputs = {
+ 'input_a': input_a,
+ 'input_b': input_b,
+ }
+ predictions = self.model(inputs, training_schedule)
+ total_loss = self.loss(flow, predictions)
+ tf.summary.scalar('loss', total_loss)
+
+ if checkpoints:
+ for (checkpoint_path, (scope, new_scope)) in checkpoints.iteritems():
+ variables_to_restore = slim.get_variables(scope=scope)
+ renamed_variables = {
+ var.op.name.split(new_scope + '/')[1]: var
+ for var in variables_to_restore
+ }
+ restorer = tf.train.Saver(renamed_variables)
+ with tf.Session() as sess:
+ restorer.restore(sess, checkpoint_path)
+
+ # Show the generated flow in TensorBoard
+ if 'flow' in predictions:
+ pred_flow_0 = predictions['flow'][0, :, :, :]
+ pred_flow_0 = tf.py_func(flow_to_image, [pred_flow_0], tf.uint8)
+ pred_flow_1 = predictions['flow'][1, :, :, :]
+ pred_flow_1 = tf.py_func(flow_to_image, [pred_flow_1], tf.uint8)
+ pred_flow_img = tf.stack([pred_flow_0, pred_flow_1], 0)
+ tf.summary.image('pred_flow', pred_flow_img, max_outputs=2)
+
+ true_flow_0 = flow[0, :, :, :]
+ true_flow_0 = tf.py_func(flow_to_image, [true_flow_0], tf.uint8)
+ true_flow_1 = flow[1, :, :, :]
+ true_flow_1 = tf.py_func(flow_to_image, [true_flow_1], tf.uint8)
+ true_flow_img = tf.stack([true_flow_0, true_flow_1], 0)
+ tf.summary.image('true_flow', true_flow_img, max_outputs=2)
+
+ train_op = slim.learning.create_train_op(
+ total_loss,
+ optimizer,
+ summarize_gradients=True)
+
+ if self.debug:
+ with tf.Session() as sess:
+ sess.run(tf.global_variables_initializer())
+ tf.train.start_queue_runners(sess)
+ slim.learning.train_step(
+ sess,
+ train_op,
+ self.global_step,
+ {
+ 'should_trace': tf.constant(1),
+ 'should_log': tf.constant(1),
+ 'logdir': log_dir + '/debug',
+ }
+ )
+ else:
+ slim.learning.train(
+ train_op,
+ log_dir,
+ # session_config=tf.ConfigProto(allow_soft_placement=True),
+ global_step=self.global_step,
+ save_summaries_secs=60,
+ number_of_steps=training_schedule['max_iter']
+ )
diff --git a/Codes/flownet2/src/ops/build/.gitkeep b/Codes/flownet2/src/ops/build/.gitkeep
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Codes/flownet2/src/ops/build/.gitkeep
diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc
new file mode 100644
index 0000000..4e92f45
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc
@@ -0,0 +1,160 @@
+#define EIGEN_USE_THREADS
+
+#include "correlation_kernel.h"
+#include "pad.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class CorrelationGradKernel : public OpKernel {
+ public:
+ explicit CorrelationGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the attributes
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
+
+ OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and verify their dimensions
+ const Tensor& gradients_t = ctx->input(0);
+ const Tensor& input_a_t = ctx->input(1);
+ const Tensor& input_b_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
+ OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
+
+ // Get dimensions of input
+ const int batch_size = input_a_t.dim_size(0);
+ const int in_height = input_a_t.dim_size(1);
+ const int in_width = input_a_t.dim_size(2);
+ const int in_channels = input_a_t.dim_size(3);
+ const int in_count_per_sample = in_height * in_width * in_channels;
+ const int padded_height = in_height + 2 * pad;
+ const int padded_width = in_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ const int kernel_radius = (kernel_size - 1) / 2;
+ const int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ const int out_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ const int out_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ const int neighborhood_grid_radius = max_displacement / stride_2;
+ const int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ const int out_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Allocate the memory for the outputs
+ Tensor *output_a_gradient_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_a_t.shape(), &output_a_gradient_t));
+ Tensor *output_b_gradient_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(1, input_b_t.shape(), &output_b_gradient_t));
+
+ // Get the tensors
+ auto gradients = gradients_t.tensor<float, 4>();
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output_a_gradient = output_a_gradient_t->tensor<float, 4>();
+ auto output_b_gradient = output_b_gradient_t->tensor<float, 4>();
+
+ // Create temporary tensors for padded inputs
+ Tensor padded_input_a_t, padded_input_b_t;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, in_channels }),
+ &padded_input_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, in_channels }),
+ &padded_input_b_t));
+ auto padded_input_a = padded_input_a_t.tensor<float, 4>();
+ auto padded_input_b = padded_input_b_t.tensor<float, 4>();
+
+ // Pad the inputs
+ Pad(ctx->eigen_device<Device>(),
+ input_a.data(),
+ batch_size,
+ in_height,
+ in_width,
+ in_channels,
+ padded_height,
+ padded_width,
+ padded_input_a.data());
+ Pad(ctx->eigen_device<Device>(),
+ input_b.data(),
+ batch_size,
+ in_height,
+ in_width,
+ in_channels,
+ padded_height,
+ padded_width,
+ padded_input_b.data());
+
+ CorrelationGradA(ctx->eigen_gpu_device(),
+ batch_size,
+ out_width,
+ out_height,
+ out_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ stride_1,
+ stride_2,
+ in_width,
+ in_height,
+ padded_width,
+ padded_height,
+ in_channels,
+ in_count_per_sample,
+ pad,
+ padded_input_b.data(),
+ gradients.data(),
+ output_a_gradient.data());
+
+ CorrelationGradB(ctx->eigen_gpu_device(),
+ batch_size,
+ out_width,
+ out_height,
+ out_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ stride_1,
+ stride_2,
+ in_width,
+ in_height,
+ padded_width,
+ padded_height,
+ in_channels,
+ in_count_per_sample,
+ pad,
+ padded_input_a.data(),
+ gradients.data(),
+ output_b_gradient.data());
+ }
+
+ private:
+ int kernel_size;
+ int max_displacement;
+ int stride_1;
+ int stride_2;
+ int pad;
+};
+
+REGISTER_KERNEL_BUILDER(Name("CorrelationGrad")
+ .Device(DEVICE_GPU),
+ CorrelationGradKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc
new file mode 100644
index 0000000..19e3a40
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc
@@ -0,0 +1,262 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#define ROUND_OFF 50000
+
+#include <stdio.h>
+#include <iostream>
+
+#include "correlation_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void CorrelateDataBackward0(const int nthreads,
+ int item,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int stride_1,
+ int stride_2,
+ int in_width,
+ int in_height,
+ int padded_in_width,
+ int padded_in_height,
+ int in_channels,
+ int in_count_per_sample,
+ int pad_size,
+ float *output_a_gradient,
+ const float *input_b,
+ const float *gradient)
+{
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int k = index % in_channels; // channels
+ int x = (index / in_channels) % in_width + pad_size; // w-pos
+ int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos
+
+ // Get X,Y ranges and clamp
+ // round_off is a trick to enable integer division with ceil, even for
+ // negative numbers
+ // We use a large offset, for the inner part not to become negative.
+ const int round_off = ROUND_OFF;
+ const int round_off_s1 = stride_1 * round_off;
+
+ // We add round_off before_s1 the int division and subtract round_off after
+ // it, to ensure the formula matches ceil behavior:
+ int xmin = (x - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 -
+ round_off;
+ int ymin = (y - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 -
+ round_off;
+
+ // Same here:
+ int xmax = (x - max_displacement + round_off_s1) / stride_1 - round_off;
+ int ymax = (y - max_displacement + round_off_s1) / stride_1 - round_off;
+
+ float sum = 0;
+
+ if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) {
+ xmin = max(0, xmin);
+ xmax = min(out_width - 1, xmax);
+
+ ymin = max(0, ymin);
+ ymax = min(out_height - 1, ymax);
+
+ for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+ for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+ // Get input_b data:
+ int s2o = stride_2 * o;
+ int s2p = stride_2 * p;
+ int idx_input_b = ((item * padded_in_height + (y + s2p)) * padded_in_width + (x + s2o)) *
+ in_channels + k;
+ float input_b_tmp = input_b[idx_input_b]; // input_b[x+s2o,y+s2p,k]
+
+ // Index offset for gradient in following loops:
+ int op = (p + neighborhood_grid_radius) * neighborhood_grid_width +
+ (o + neighborhood_grid_radius); // index [o,p]
+
+ for (int y = ymin; y <= ymax; y++) {
+ for (int x = xmin; x <= xmax; x++) {
+ // gradient[x,y,o,p]
+ int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op;
+ sum += gradient[idx_gradient] * input_b_tmp;
+ }
+ }
+ }
+ }
+ }
+ const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels;
+ const int input_a_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k;
+ output_a_gradient[input_a_idx + item * in_count_per_sample] = sum / (float)sumelems;
+ }
+}
+
+__global__ void CorrelateDataBackward1(const int nthreads,
+ int item,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int stride_1,
+ int stride_2,
+ int in_width,
+ int in_height,
+ int padded_in_width,
+ int padded_in_height,
+ int in_channels,
+ int in_count_per_sample,
+ int pad_size,
+ float *output_b_gradient,
+ const float *input_a,
+ const float *gradient)
+{
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int k = index % in_channels; // channels
+ int x = (index / in_channels) % in_width + pad_size; // w-pos
+ int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos
+
+ // round_off is a trick to enable integer division with ceil, even for
+ // negative numbers
+ // We use a large offset, for the inner part not to become negative.
+ const int round_off = ROUND_OFF;
+ const int round_off_s1 = stride_1 * round_off;
+
+ float sum = 0;
+
+ // Height (y)
+ for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) {
+ // Width (x)
+ for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) {
+ int s2o = stride_2 * o;
+ int s2p = stride_2 * p;
+
+ // Get X,Y ranges and clamp
+ // We add round_off before_s1 the int division and subtract round_off
+ // after it, to ensure the formula matches ceil behavior:
+ int xmin = (x - 2 * kernel_radius - max_displacement - s2o + round_off_s1 - 1) / stride_1 +
+ 1 - round_off;
+ int ymin = (y - 2 * kernel_radius - max_displacement - s2p + round_off_s1 - 1) / stride_1 +
+ 1 - round_off;
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+
+ // Same here:
+ int xmax = (x - max_displacement - s2o + round_off_s1) / stride_1 - round_off;
+ int ymax = (y - max_displacement - s2p + round_off_s1) / stride_1 - round_off;
+
+ if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) {
+ xmin = max(0, xmin);
+ xmax = min(out_width - 1, xmax);
+
+ ymin = max(0, ymin);
+ ymax = min(out_height - 1, ymax);
+
+ // Get input_a data:
+ int idx_input_a = ((item * padded_in_height + (y - s2p)) * padded_in_width + (x - s2o)) *
+ in_channels + k;
+ float input_a_tmp = input_a[idx_input_a];
+
+ // Index offset for gradient in following loops:
+ int op = (p + neighborhood_grid_radius) * neighborhood_grid_width +
+ (o + neighborhood_grid_radius); // index [o,p]
+
+ for (int y = ymin; y <= ymax; y++) {
+ for (int x = xmin; x <= xmax; x++) {
+ int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op;
+ sum += gradient[idx_gradient] * input_a_tmp;
+ }
+ }
+ }
+ }
+ }
+ const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels;
+ const int input_b_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k;
+ output_b_gradient[input_b_idx + item * in_count_per_sample] = sum / (float)sumelems;
+ }
+}
+
+void CorrelationGradA(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample, // h * w * ch
+ const int pad,
+ const float *input_b,
+ const float *gradient,
+ float *output_a_gradient) {
+ CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device);
+
+ for (int n = 0; n < batch_size; n++) {
+ CorrelateDataBackward0 << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ in_count_per_sample,
+ n, out_width, out_height, out_channels,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ stride_1, stride_2,
+ in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad,
+ output_a_gradient, input_b, gradient);
+ }
+}
+
+void CorrelationGradB(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_a,
+ const float *gradient,
+ float *output_b_gradient) {
+ CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device);
+
+ for (int n = 0; n < batch_size; n++) {
+ CorrelateDataBackward1 << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ in_count_per_sample,
+ n, out_width, out_height, out_channels,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ stride_1, stride_2,
+ in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad,
+ output_b_gradient, input_a, gradient);
+ }
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc
new file mode 100644
index 0000000..f8a5193
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc
@@ -0,0 +1,137 @@
+#define EIGEN_USE_THREADS
+
+#include <utility>
+
+#include "correlation_kernel.h"
+#include "pad.h"
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+template<typename Device>
+class CorrelationKernel : public OpKernel {
+ public:
+ explicit CorrelationKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the attributes
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad));
+
+ OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& input_a_t = ctx->input(0);
+ const Tensor& input_b_t = ctx->input(1);
+
+ OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4"));
+ OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4"));
+
+ // Get dimensions of input (already padded)
+ int batch_size = input_a_t.dim_size(0);
+ int input_height = input_a_t.dim_size(1);
+ int input_width = input_a_t.dim_size(2);
+ int input_channels = input_a_t.dim_size(3);
+ int padded_height = input_height + 2 * pad;
+ int padded_width = input_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ int kernel_radius = (kernel_size - 1) / 2;
+ int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ int output_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ int output_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ OP_REQUIRES(ctx, output_height >= 1,
+ errors::InvalidArgument("Neighborhood and kernel don't fit in input height."));
+ OP_REQUIRES(ctx, output_width >= 1,
+ errors::InvalidArgument("Neighborhood and kernel don't fit in input width."));
+
+ int neighborhood_grid_radius = max_displacement / stride_2;
+ int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ int output_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0,
+ TensorShape({ batch_size, output_height, output_width, output_channels }),
+ &output_t));
+
+ // Get the tensors
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ // Create temporary tensors for padded inputs
+ Tensor padded_input_a_t, padded_input_b_t;
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, input_channels }),
+ &padded_input_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, padded_height, padded_width, input_channels }),
+ &padded_input_b_t));
+ auto padded_input_a = padded_input_a_t.tensor<float, 4>();
+ auto padded_input_b = padded_input_b_t.tensor<float, 4>();
+
+ // Pad the inputs
+ Pad(ctx->eigen_device<Device>(),
+ input_a.data(),
+ batch_size,
+ input_height,
+ input_width,
+ input_channels,
+ padded_height,
+ padded_width,
+ padded_input_a.data());
+ Pad(ctx->eigen_device<Device>(),
+ input_b.data(),
+ batch_size,
+ input_height,
+ input_width,
+ input_channels,
+ padded_height,
+ padded_width,
+ padded_input_b.data());
+
+ // Perform cross correlation
+ Correlation(ctx->eigen_device<Device>(),
+ padded_input_a.data(),
+ padded_input_b.data(),
+ batch_size,
+ output_height,
+ output_width,
+ output_channels,
+ output_height * output_width * output_channels,
+ padded_height,
+ padded_width,
+ input_channels,
+ max_displacement,
+ neighborhood_grid_radius,
+ neighborhood_grid_width,
+ kernel_radius,
+ kernel_size,
+ stride_1,
+ stride_2,
+ output.data());
+ }
+
+ private:
+ int kernel_size;
+ int max_displacement;
+ int stride_1;
+ int stride_2;
+ int pad;
+};
+
+REGISTER_KERNEL_BUILDER(Name("Correlation")
+ .Device(DEVICE_GPU),
+ CorrelationKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc
new file mode 100644
index 0000000..c63e489
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc
@@ -0,0 +1,153 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#define WARPS_PER_BLOCK 1
+#define THREADS_PER_WARP 32
+
+#include <stdio.h>
+#include <iostream>
+
+#include "correlation_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void CorrelateData(int batch_size,
+ int out_width,
+ int out_height,
+ int out_channels,
+ int out_count,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ int in_width_padded,
+ int in_height_padded,
+ int in_channels,
+ const float *input_a,
+ const float *input_b,
+ float *output) {
+ extern __shared__ char patch_data_char[];
+
+ float *patch_data = (float *)patch_data_char;
+
+ // First (upper left) position of kernel upper-left corner in current center
+ // position of neighborhood in image 1
+ int x1 = blockIdx.x * stride_1 + max_displacement;
+ int y1 = blockIdx.y * stride_1 + max_displacement;
+ int item = blockIdx.z;
+ int ch_off = threadIdx.x;
+
+ // Load 3D patch into shared shared memory
+ // HEIGHT
+ for (int j = 0; j < kernel_size; j++) {
+ // WIDTH
+ for (int i = 0; i < kernel_size; i++) {
+ int ji_off = ((j * kernel_size) + i) * in_channels;
+
+ // CHANNELS
+ for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
+ int idx1 = ((item * in_height_padded + y1 + j) * in_width_padded + x1 + i) *
+ in_channels + ch;
+ int idxPatchData = ji_off + ch;
+ patch_data[idxPatchData] = input_a[idx1];
+ }
+ }
+ }
+
+ __syncthreads();
+
+ __shared__ float sum[WARPS_PER_BLOCK * THREADS_PER_WARP];
+
+ // Compute correlation
+ for (int out_channel = 0; out_channel < out_channels; out_channel++) {
+ sum[ch_off] = 0;
+
+ int s2o = (out_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
+ int s2p = (out_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride_2;
+ int x2 = x1 + s2o;
+ int y2 = y1 + s2p;
+
+ // HEIGHT
+ for (int j = 0; j < kernel_size; j++) {
+ // WIDTH
+ for (int i = 0; i < kernel_size; i++) {
+ int ji_off = ((j * kernel_size) + i) * in_channels;
+
+ // CHANNELS
+ for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) {
+ int idxPatchData = ji_off + ch;
+ int idx2 = ((item * in_height_padded + y2 + j) * in_width_padded + x2 + i) *
+ in_channels + ch;
+
+ sum[ch_off] += patch_data[idxPatchData] * input_b[idx2];
+ }
+ }
+ }
+
+ __syncthreads();
+
+ if (ch_off == 0) {
+ float total_sum = 0;
+
+ for (int idx = 0; idx < WARPS_PER_BLOCK * THREADS_PER_WARP; idx++) {
+ total_sum += sum[idx];
+ }
+ const int sumelems = kernel_size * kernel_size * in_channels;
+ const int index = (blockIdx.y * out_width + blockIdx.x) * out_channels + out_channel;
+
+ /* from Caffe: const int index = ((out_channel * out_height +
+ blockIdx.y) * out_width) + blockIdx.x; */
+ output[index + item * out_count] = total_sum / (float)sumelems;
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ // n = 0
+ // caffe: ((k * H + h) * W + w) + n * K * H * W
+ // tf: (h * W + w) * K + k + n * H * W * K
+ }
+ }
+}
+
+void Correlation(const GPUDevice& device,
+ const float *input_a,
+ const float *input_b,
+ const int batch_size,
+ const int out_height,
+ const int out_width,
+ const int out_channels,
+ const int out_count,
+ const int in_height_padded,
+ const int in_width_padded,
+ const int in_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ float *output) {
+ dim3 totalBlocksCorr(out_width, out_height, batch_size);
+ dim3 threadsPerBlock(THREADS_PER_WARP *WARPS_PER_BLOCK);
+ const int shared_memory_per_block = (kernel_size * kernel_size) * in_channels;
+
+ CorrelateData << < totalBlocksCorr, threadsPerBlock, shared_memory_per_block * sizeof(float),
+ device.stream() >> > (
+ batch_size, out_width, out_height, out_channels, out_count,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius,
+ kernel_size, stride_1, stride_2, in_width_padded, in_height_padded, in_channels,
+ input_a, input_b, output);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.h b/Codes/flownet2/src/ops/correlation/correlation_kernel.h
new file mode 100644
index 0000000..a1dfb62
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.h
@@ -0,0 +1,77 @@
+#ifndef FLOWNET_CORRELATION_H_
+#define FLOWNET_CORRELATION_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void Correlation(const GPUDevice& device,
+ const float *input_a,
+ const float *input_b,
+ const int batch_size,
+ const int out_height,
+ const int out_width,
+ const int out_channels,
+ const int out_count,
+ const int in_height_padded,
+ const int in_width_padded,
+ const int in_channels,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_radius,
+ int kernel_size,
+ int stride_1,
+ int stride_2,
+ float *output);
+
+
+void CorrelationGradA(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_b,
+ const float *gradient,
+ float *output_a_gradient);
+
+void CorrelationGradB(const GPUDevice& device,
+ const int batch_size,
+ const int out_width,
+ const int out_height,
+ const int out_channels,
+ const int max_displacement,
+ const int neighborhood_grid_radius,
+ const int neighborhood_grid_width,
+ const int kernel_radius,
+ const int stride_1,
+ const int stride_2,
+ const int in_width,
+ const int in_height,
+ const int padded_in_width,
+ const int padded_in_height,
+ const int in_channels,
+ const int in_count_per_sample,
+ const int pad,
+ const float *input_a,
+ const float *gradient,
+ float *output_b_gradient);
+} // end namespace tensorflow
+
+#endif // FLOWNET_CORRELATION_H_
diff --git a/Codes/flownet2/src/ops/correlation/correlation_op.cc b/Codes/flownet2/src/ops/correlation/correlation_op.cc
new file mode 100644
index 0000000..4f420f0
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/correlation_op.cc
@@ -0,0 +1,83 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+Status SetOutput(InferenceContext *c) {
+ ShapeHandle input_a, input_b, input;
+
+ // Get shapes of both inputs and verify they are rank 4
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_a));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_b));
+
+ // Verify inputs are same dimensions
+ TF_RETURN_IF_ERROR(c->Merge(input_a, input_b, &input));
+
+ // Get the attributes
+ int kernel_size, max_displacement, stride_1, stride_2, pad;
+ TF_RETURN_IF_ERROR(c->GetAttr("kernel_size", &kernel_size));
+ TF_RETURN_IF_ERROR(c->GetAttr("max_displacement", &max_displacement));
+ TF_RETURN_IF_ERROR(c->GetAttr("stride_1", &stride_1));
+ TF_RETURN_IF_ERROR(c->GetAttr("stride_2", &stride_2));
+ TF_RETURN_IF_ERROR(c->GetAttr("pad", &pad));
+
+ // Get dimensions of input (already padded)
+ int64 batch = c->Value(c->Dim(input, 0));
+ int64 input_height = c->Value(c->Dim(input, 1));
+ int64 input_width = c->Value(c->Dim(input, 2));
+ int64 padded_height = input_height + 2 * pad;
+ int64 padded_width = input_width + 2 * pad;
+
+ // The size of unreachable border region on each side
+ int kernel_radius = (kernel_size - 1) / 2;
+ int border_size = max_displacement + kernel_radius;
+
+ // Calculate the output dimensions
+ int64 output_height = (int64)ceil((float)(padded_height - border_size * 2) / (float)stride_1);
+ int64 output_width = (int64)ceil((float)(padded_width - border_size * 2) / (float)stride_1);
+
+ // TODO: Verify output size >= 1
+
+ int neighborhood_grid_radius = max_displacement / stride_2;
+ int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1;
+ int64 output_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ // Set output shape
+ c->set_output(0, c->MakeShape({ batch, output_height, output_width, output_channels }));
+ return Status::OK();
+}
+
+REGISTER_OP("Correlation")
+.Input("input_a: float32")
+.Input("input_b: float32")
+.Attr("kernel_size: int")
+.Attr("max_displacement: int")
+.Attr("stride_1: int")
+.Attr("stride_2: int")
+.Attr("pad: int")
+.Output("output: float32")
+.SetShapeFn(SetOutput);
+
+REGISTER_OP("CorrelationGrad")
+.Input("gradients: float32")
+.Input("input_a: float32")
+.Input("input_b: float32")
+.Attr("kernel_size: int")
+.Attr("max_displacement: int")
+.Attr("stride_1: int")
+.Attr("stride_2: int")
+.Attr("pad: int")
+.Output("backprops_a: float32")
+.Output("backprops_b: float32")
+.SetShapeFn([](InferenceContext *c) {
+ // Output gradients should be the same dimensions as the inputs
+ ShapeHandle out;
+ TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->input(2), &out));
+ c->set_output(0, out);
+ c->set_output(1, out);
+ return Status::OK();
+ });
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/correlation/pad.cu.cc b/Codes/flownet2/src/ops/correlation/pad.cu.cc
new file mode 100644
index 0000000..0b6c93d
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/pad.cu.cc
@@ -0,0 +1,76 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "pad.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void PadData(
+ const float *in,
+ int in_widthheight,
+ int in_width,
+ int in_height,
+ int out_width,
+ int out_height,
+ int channels,
+ int padding,
+ float *out) {
+ int xy = blockIdx.x * blockDim.x + threadIdx.x;
+
+ int x = xy % in_width;
+ int y = xy / in_width;
+ int ch = blockIdx.y;
+ int n = blockIdx.z;
+
+ if (xy >= in_widthheight) {
+ out[((n * out_height + y) * out_width + x) * channels + ch] = 0.0;
+ return;
+ }
+
+ float value = in[((n * in_height + y) * in_width + x) * channels + ch];
+
+ __syncthreads();
+
+ int xpad = x + padding;
+ int ypad = y + padding;
+
+ out[((n * out_height + ypad) * out_width + xpad) * channels + ch] = value;
+}
+
+void Pad(const GPUDevice& device,
+ const float *input,
+ int batch_size,
+ int input_height,
+ int input_width,
+ int input_channels,
+ int output_height,
+ int output_width,
+ float *output) {
+ int in_widthheight = input_width * input_height;
+ int threads_per_block = 16;
+ dim3 totalBlocks((in_widthheight - 1) / threads_per_block + 1, input_channels, batch_size);
+
+ cudaMemset(output, 0, batch_size * output_height * output_width * input_channels * sizeof(float));
+
+ int padding = (output_height - input_height) / 2;
+
+ // LAUNCH KERNEL
+ PadData << < totalBlocks, threads_per_block, 0, device.stream() >> > (
+ input,
+ in_widthheight,
+ input_width,
+ input_height,
+ output_width,
+ output_height,
+ input_channels,
+ padding,
+ output);
+}
+}
+#endif // if GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/correlation/pad.h b/Codes/flownet2/src/ops/correlation/pad.h
new file mode 100644
index 0000000..afb4df0
--- /dev/null
+++ b/Codes/flownet2/src/ops/correlation/pad.h
@@ -0,0 +1,20 @@
+#ifndef FLOWNET_PAD_H_
+#define FLOWNET_PAD_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void Pad(const GPUDevice& device,
+ const float *input,
+ int batch_size,
+ int input_height,
+ int input_width,
+ int input_channels,
+ int output_height,
+ int output_width,
+ float *output);
+} // end namespace tensorflow
+
+#endif // ifndef FLOWNET_PAD_H_
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc
new file mode 100644
index 0000000..eefe247
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc
@@ -0,0 +1,47 @@
+#define EIGEN_USE_THREADS
+
+#include "downsample_kernel.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device>
+class DownsampleKernel : public OpKernel {
+ public:
+ explicit DownsampleKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {
+ // Get the size [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("size", &size_));
+ OP_REQUIRES(ctx, size_.size() == 2, errors::InvalidArgument("size must be 2 dimensions"));
+ }
+
+ void Compute(OpKernelContext* ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& input_t = ctx->input(0);
+ OP_REQUIRES(ctx, input_t.dims() == 4,
+ errors::InvalidArgument("Input images must have rank 4"));
+
+ // Allocate the memory for the output
+ Tensor* output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0, TensorShape({input_t.dim_size(0), size_[0], size_[1], input_t.dim_size(3)}), &output_t));
+
+ // Perform flow augmentation
+ auto input = input_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ Downsample(ctx->eigen_gpu_device(), input, output);
+ }
+
+ private:
+ std::vector<int32> size_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("Downsample")
+ .Device(DEVICE_GPU),
+ DownsampleKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.h b/Codes/flownet2/src/ops/downsample/downsample_kernel.h
new file mode 100644
index 0000000..bcc4e3f
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.h
@@ -0,0 +1,18 @@
+#ifndef FLOWNET_DOWNSAMPLE_H_
+#define FLOWNET_DOWNSAMPLE_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+bool Downsample(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::Tensor output);
+
+} // end namespace tensorflow
+
+#endif // FLOWNET_DOWNSAMPLE_H_
diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc
new file mode 100644
index 0000000..b7629a0
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc
@@ -0,0 +1,108 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "downsample_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#define CUDART_NAN_F __int_as_float(0x7fffffff)
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void DownsampleKernel(
+ const int32 nthreads,
+ const float* input_ptr,
+ float* output_ptr,
+ const int in_width,
+ const int in_height,
+ const int out_width,
+ const int out_height,
+ const int channels,
+ const float width_scale,
+ const float height_scale,
+ const int wradius,
+ const int hradius) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ const int c = index % channels;
+ const int destx = (index / channels) % out_width;
+ const int desty = (index / channels / out_width) % out_height;
+ const int n = (index / channels / out_width) / out_height;
+
+ const float srcx = ((float)destx / (float)(out_width - 1)) * (float)(in_width - 1);
+ const float srcy = ((float)desty / (float)(out_height - 1)) * (float)(in_height - 1);
+
+ const int isrcx = round(srcx);
+ const int isrcy = round(srcy);
+
+ float accum_value = 0;
+ float accum_weight = 0;
+ float accum_nan = 0;
+
+ for (int dy = -hradius; dy <= hradius; dy++) {
+ int yoff = isrcy + dy;
+ //
+ for (int dx = -wradius; dx <= wradius; dx++) {
+ int xoff = isrcx + dx;
+
+ if (xoff >= 0 && yoff >= 0 && xoff < in_width && yoff < in_height) {
+ int idx = ((n * in_height + yoff) * in_width + xoff) * channels + c;
+ float sample = input_ptr[idx];
+ float weight = fmaxf(0.0f, 1.0f - (fabsf((float)xoff - srcx) / width_scale))
+ * fmaxf(0.0f, 1.0f - (fabsf((float)yoff - srcy) / height_scale));
+ if (sample != sample) { // isnan
+ accum_nan += weight;
+ sample = 0;
+ weight = 0;
+ }
+ accum_value += sample * weight;
+ accum_weight += weight;
+ }
+ }
+ }
+
+ if (accum_nan / accum_weight > 0.5) {
+ output_ptr[index] = CUDART_NAN_F;
+ } else {
+ output_ptr[index] = accum_value / accum_weight;
+ }
+ }
+}
+
+bool Downsample(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::Tensor output) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int out_channels = output.dimension(3);
+ const int total_count = batch_size * out_height * out_width * out_channels;
+
+ const int in_height = input.dimension(1);
+ const int in_width = input.dimension(2);
+
+ const float width_scale = (float)(in_width - 1) / (float)(out_width - 1);
+ const float height_scale = (float)(in_height - 1) / (float)(out_height - 1);
+
+ const int wradius = ceil(width_scale);
+ const int hradius = ceil(height_scale);
+
+ CudaLaunchConfig config = GetCudaLaunchConfig(total_count, device);
+ DownsampleKernel<<<config.block_count, config.thread_per_block, 0,
+ device.stream()>>>(total_count, input.data(), output.data(),
+ in_width, in_height, out_width, out_height, out_channels,
+ width_scale, height_scale, wradius, hradius);
+ return device.ok();
+}
+
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/downsample/downsample_op.cc b/Codes/flownet2/src/ops/downsample/downsample_op.cc
new file mode 100644
index 0000000..6980dc7
--- /dev/null
+++ b/Codes/flownet2/src/ops/downsample/downsample_op.cc
@@ -0,0 +1,30 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::DimensionHandle;
+
+Status SetOutputToSizedImage(InferenceContext* c) {
+ ShapeHandle input;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+ DimensionHandle batch = c->Dim(input, 0);
+ DimensionHandle depth = c->Dim(input, 3);
+ std::vector<int32> size_;
+ c->GetAttr("size", &size_);
+ DimensionHandle height = c->MakeDim(size_[0]);
+ DimensionHandle width = c->MakeDim(size_[1]);
+ c->set_output(0, c->MakeShape({batch, height, width, depth}));
+ return Status::OK();
+}
+
+REGISTER_OP("Downsample")
+ .Input("input: float32")
+ .Attr("size: list(int) >= 2")
+ .Output("output: float32")
+ .SetShapeFn(SetOutputToSizedImage);
+
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc
new file mode 100644
index 0000000..b5d9602
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc
@@ -0,0 +1,48 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class FlowWarpKernel : public OpKernel {
+ public:
+ explicit FlowWarpKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input image and flow and verify dimensions
+ const Tensor& input_t = ctx->input(0);
+ const Tensor& flow_t = ctx->input(1);
+
+ OP_REQUIRES(ctx, input_t.dims() == 4,
+ errors::InvalidArgument("Input image must have rank 4"));
+ OP_REQUIRES(ctx, flow_t.dims() == 4,
+ errors::InvalidArgument("Input flow must have rank 4"));
+ OP_REQUIRES(ctx,
+ input_t.dim_size(0) == flow_t.dim_size(0) && input_t.dim_size(
+ 1) == flow_t.dim_size(1) && input_t.dim_size(2) == flow_t.dim_size(2),
+ errors::InvalidArgument(
+ "Input image and flow must have same N x H x W dimensions"));
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_t.shape(), &output_t));
+
+ // Perform flow augmentation
+ auto input = input_t.tensor<float, 4>();
+ auto flow = flow_t.tensor<float, 4>();
+ auto output = output_t->tensor<float, 4>();
+
+ FlowWarp(ctx->eigen_gpu_device(), input, flow, output);
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowWarp")
+ .Device(DEVICE_GPU),
+ FlowWarpKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc
new file mode 100644
index 0000000..2007151
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc
@@ -0,0 +1,130 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#define RA_TILE 32
+#define RA_ROWS 8
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void FlowWarpKernel(
+ const float *image,
+ const float *flow,
+ float *warped,
+ const int batch_size,
+ const int channels,
+ const int cblocks,
+ const int width,
+ const int wblocks,
+ const int height,
+ const int width_height) {
+ int y = blockIdx.y;
+ int n = blockIdx.z;
+
+ __shared__ float x2_buf[FW_TILE_X], y2_buf[FW_TILE_X];
+ __shared__ float buffer[FW_TILE_C][FW_TILE_X + 1];
+
+ int x;
+ int c;
+
+ x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if ((threadIdx.y == 0) && (x < width)) {
+ const int idx = ((n * height + y) * width + x) * 2;
+ x2_buf[threadIdx.x] = float(x) + flow[idx];
+ y2_buf[threadIdx.x] = float(y) + flow[idx + 1];
+ }
+
+ __syncthreads();
+
+ float x2 = x2_buf[threadIdx.y];
+ float y2 = y2_buf[threadIdx.y];
+
+ int ix2_L = int(x2);
+ int iy2_T = int(y2);
+ int ix2_R = min(ix2_L + 1, width - 1);
+ int iy2_B = min(iy2_T + 1, height - 1);
+
+ int off_TL = ((n * height + iy2_T) * width + ix2_L) * channels;
+ int off_TR = ((n * height + iy2_T) * width + ix2_R) * channels;
+ int off_BL = ((n * height + iy2_B) * width + ix2_L) * channels;
+ int off_BR = ((n * height + iy2_B) * width + ix2_R) * channels;
+
+ float alpha = x2 - ix2_L;
+ float beta = y2 - iy2_T;
+ float coeffTL = (1 - alpha) * (1 - beta);
+ float coeffTR = alpha * (1 - beta);
+ float coeffBL = (1 - alpha) * beta;
+ float coeffBR = alpha * beta;
+
+ for (int cb = 0; cb < cblocks; cb++) {
+ __syncthreads();
+
+ buffer[threadIdx.y][threadIdx.x] = 0.0;
+
+ __syncthreads();
+
+ c = cb * FW_TILE_C + threadIdx.x;
+
+ if ((x2 >= 0) && (y2 >= 0) && (x2 < width) && (y2 < height) && (c < channels)) {
+ buffer[threadIdx.y][threadIdx.x] = // buffer [x][c]
+ coeffTL * image[off_TL + c] +
+ coeffTR * image[off_TR + c] +
+ coeffBL * image[off_BL + c] +
+ coeffBR * image[off_BR + c];
+ }
+
+ __syncthreads();
+
+ c = cb * FW_TILE_C + threadIdx.y;
+ x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if ((c < channels) && (x < width)) {
+ warped[((n * height + y) * width + x) * channels + c] = buffer[threadIdx.x][threadIdx.y];
+ }
+ }
+}
+
+void FlowWarp(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::Tensor output) {
+ const int batch_size = input.dimension(0);
+ const int height = input.dimension(1);
+ const int width = input.dimension(2);
+ const int channels = input.dimension(3);
+
+ const int width_height = width * height;
+ int wblocks = ((width - 1) / FW_TILE_X + 1);
+ int cblocks = ((channels - 1) / FW_TILE_C + 1);
+ dim3 warpThreads(FW_TILE_X, FW_TILE_C);
+ dim3 warpBlocks(wblocks, height, batch_size);
+
+ cudaMemset(output.data(), 0, batch_size * height * width * 2 * sizeof(float));
+
+ FlowWarpKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
+ input.data(),
+ flow.data(),
+ output.data(),
+ batch_size,
+ channels,
+ cblocks,
+ width,
+ wblocks,
+ height,
+ width_height);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.h b/Codes/flownet2/src/ops/flow_warp/flow_warp.h
new file mode 100644
index 0000000..2780316
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.h
@@ -0,0 +1,28 @@
+#ifndef FLOWNET_FLOWWARP_H_
+#define FLOWNET_FLOWWARP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+#define FW_THREADS 32
+#define FW_TILE_X FW_THREADS
+#define FW_TILE_C FW_THREADS
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+void FlowWarp(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor input,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::Tensor output);
+
+void FlowWarpGrad(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor image,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::ConstTensor gradient,
+ typename TTypes<float, 4>::Tensor image_grad,
+ typename TTypes<float, 4>::Tensor flow_grad);
+} // end namespace tensorflow
+
+#endif // FLOWNET_FLOWWARP_H_
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc
new file mode 100644
index 0000000..9f3e7ea
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc
@@ -0,0 +1,57 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_warp.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+template<typename Device>
+class FlowWarpGradKernel : public OpKernel {
+ public:
+ explicit FlowWarpGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {}
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input image and flow and verify dimensions
+ const Tensor& image_t = ctx->input(0);
+ const Tensor& flow_t = ctx->input(1);
+ const Tensor& grad_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, image_t.dims() == 4,
+ errors::InvalidArgument("Input image must have rank 4"));
+ OP_REQUIRES(ctx, flow_t.dims() == 4,
+ errors::InvalidArgument("Input flow must have rank 4"));
+ OP_REQUIRES(ctx,
+ image_t.dim_size(0) == flow_t.dim_size(0) && image_t.dim_size(
+ 1) == flow_t.dim_size(1) && image_t.dim_size(2) == flow_t.dim_size(2),
+ errors::InvalidArgument(
+ "Input image and flow must have same N x H x W dimensions"));
+
+ // Allocate the memory for the output
+ Tensor *image_grad_t;
+ Tensor *flow_grad_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, image_t.shape(), &image_grad_t));
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, flow_t.shape(), &flow_grad_t));
+
+ auto image = image_t.tensor<float, 4>();
+ auto flow = flow_t.tensor<float, 4>();
+ auto gradient = grad_t.tensor<float, 4>();
+ auto image_grad = image_grad_t->tensor<float, 4>();
+ auto flow_grad = flow_grad_t->tensor<float, 4>();
+
+ FlowWarpGrad(ctx->eigen_gpu_device(),
+ image,
+ flow,
+ gradient,
+ image_grad,
+ flow_grad);
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowWarpGrad")
+ .Device(DEVICE_GPU),
+ FlowWarpGradKernel<GPUDevice>)
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc
new file mode 100644
index 0000000..25248c8
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc
@@ -0,0 +1,126 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "flow_warp.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+__global__ void FlowWarpGradKernel(
+ const float *image,
+ float *image_grad,
+ const float *flow,
+ float *flow_grad,
+ const float *gradient,
+ int batch_size,
+ int channels,
+ int cblocks,
+ int width,
+ int wblocks,
+ int height,
+ int widthheight) {
+ int x = blockIdx.x * FW_TILE_X + threadIdx.x;
+
+ if (x >= width) return;
+
+ int y = blockIdx.y;
+ int n = blockIdx.z;
+
+ const int flow_idx = ((n * height + y) * width + x) * 2;
+ float x2 = float(x) + flow[flow_idx];
+ float y2 = float(y) + flow[flow_idx + 1];
+
+ if ((x2 >= 0.f) && (y2 >= 0.f) && (x2 < width) && (y2 < height)) {
+ int ix2_L = int(x2);
+ int iy2_T = int(y2);
+ int ix2_R = min(ix2_L + 1, width - 1);
+ int iy2_B = min(iy2_T + 1, height - 1);
+
+ float alpha = x2 - ix2_L;
+ float beta = y2 - iy2_T;
+
+ for (int c = 0; c < channels; c++) {
+ float warped_diff_value = gradient[((n * height + y) * width + x) * channels + c];
+ atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_L) * channels + c],
+ warped_diff_value * (1 - alpha) * (1 - beta));
+ atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_R) * channels + c],
+ warped_diff_value * alpha * (1 - beta));
+ atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_L) * channels + c],
+ warped_diff_value * (1 - alpha) * beta);
+ atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_R) * channels + c],
+ warped_diff_value * alpha * beta);
+ }
+
+ float gamma = iy2_B - y2;
+ float bot_diff = 0;
+
+ for (int c = 0; c < channels; c++) {
+ int ch_off = (n * channels + c) * height;
+ float temp = 0;
+ temp += gamma *
+ (image[((n * height + iy2_T) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
+ temp += (1 - gamma) *
+ (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_B) * width + ix2_L) * channels + c]);
+
+ bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
+ }
+ flow_grad[((n * height + y) * width + x) * 2] = bot_diff;
+
+ gamma = ix2_R - x2;
+ bot_diff = 0;
+
+ for (int c = 0; c < channels; c++) {
+ float temp = 0;
+ temp += gamma *
+ (image[((n * height + iy2_B) * width + ix2_L) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_L) * channels + c]);
+ temp += (1 - gamma) *
+ (image[((n * height + iy2_B) * width + ix2_R) * channels + c] -
+ image[((n * height + iy2_T) * width + ix2_R) * channels + c]);
+
+ bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp;
+ }
+ flow_grad[((n * height + y) * width + x) * 2 + 1] = bot_diff;
+ }
+}
+
+void FlowWarpGrad(const GPUDevice& device,
+ typename TTypes<float, 4>::ConstTensor image,
+ typename TTypes<float, 4>::ConstTensor flow,
+ typename TTypes<float, 4>::ConstTensor gradient,
+ typename TTypes<float, 4>::Tensor image_grad,
+ typename TTypes<float, 4>::Tensor flow_grad) {
+ const int batch_size = image.dimension(0);
+ const int height = image.dimension(1);
+ const int width = image.dimension(2);
+ const int channels = image.dimension(3);
+ const int width_height = width * height;
+
+ int wblocks = ((width - 1) / FW_TILE_X + 1);
+ int cblocks = ((channels - 1) / FW_TILE_C + 1);
+ dim3 warpThreads(FW_TILE_X, 1);
+ dim3 warpBlocks(wblocks, height, batch_size);
+
+ cudaMemset(image_grad.data(), 0, batch_size * height * width * channels * sizeof(float));
+ cudaMemset(flow_grad.data(), 0, batch_size * height * width * 2 * sizeof(float));
+
+ FlowWarpGradKernel << < warpBlocks, warpThreads, 0, device.stream() >> > (
+ image.data(),
+ image_grad.data(),
+ flow.data(),
+ flow_grad.data(),
+ gradient.data(),
+ batch_size,
+ channels,
+ cblocks,
+ width,
+ wblocks,
+ height,
+ width_height);
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc
new file mode 100644
index 0000000..aef9c74
--- /dev/null
+++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc
@@ -0,0 +1,23 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+REGISTER_OP("FlowWarp")
+.Input("image: float32")
+.Input("flow: float32")
+.Output("output: float32")
+.SetShapeFn(::tensorflow::shape_inference::UnchangedShape);
+
+REGISTER_OP("FlowWarpGrad")
+.Input("image: float32")
+.Input("flow: float32")
+.Input("gradient: float32")
+.Output("image_grad: float32")
+.Output("flow_grad: float32")
+.SetShapeFn([](shape_inference::InferenceContext *c) {
+ c->set_output(0, c->input(0));
+ c->set_output(1, c->input(1));
+ return Status::OK();
+ });
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc
new file mode 100644
index 0000000..b93dfa6
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc
@@ -0,0 +1,420 @@
+#include "augmentation_base.h"
+
+#include <math.h>
+#include <random>
+
+namespace tensorflow {
+/** TransMat Functions **/
+void AugmentationLayerBase::TransMat::fromCoeff(AugmentationCoeff *coeff,
+ int out_width,
+ int out_height,
+ int src_width,
+ int src_height) {
+ leftMultiply(1, 0, -0.5 * out_width,
+ 0, 1, -0.5 * out_height);
+
+ if (coeff->angle) {
+ leftMultiply(cos(coeff->angle()), -sin(coeff->angle()), 0,
+ sin(coeff->angle()), cos(coeff->angle()), 0);
+ }
+
+ if (coeff->dx || coeff->dy) {
+ leftMultiply(1, 0, coeff->dx() * out_width,
+ 0, 1, coeff->dy() * out_height);
+ }
+
+ if (coeff->zoom_x || coeff->zoom_y) {
+ leftMultiply(1.0 / coeff->zoom_x(), 0, 0,
+ 0, 1.0 / coeff->zoom_y(), 0);
+ }
+
+ leftMultiply(1, 0, 0.5 * src_width,
+ 0, 1, 0.5 * src_height);
+}
+
+void AugmentationLayerBase::TransMat::fromTensor(const float *tensor_data) {
+ t0 = tensor_data[0];
+ t1 = tensor_data[1];
+ t2 = tensor_data[2];
+ t3 = tensor_data[3];
+ t4 = tensor_data[4];
+ t5 = tensor_data[5];
+}
+
+AugmentationLayerBase::TransMat AugmentationLayerBase::TransMat::inverse() {
+ float a = this->t0, b = this->t1, c = this->t2;
+ float d = this->t3, e = this->t4, f = this->t5;
+
+ float denom = a * e - b * d;
+
+ TransMat result;
+
+ result.t0 = e / denom;
+ result.t1 = b / -denom;
+ result.t2 = (c * e - b * f) / -denom;
+ result.t3 = d / -denom;
+ result.t4 = a / denom;
+ result.t5 = (c * d - a * f) / denom;
+
+ return result;
+}
+
+void AugmentationLayerBase::TransMat::leftMultiply(float u0,
+ float u1,
+ float u2,
+ float u3,
+ float u4,
+ float u5) {
+ float t0 = this->t0, t1 = this->t1, t2 = this->t2;
+ float t3 = this->t3, t4 = this->t4, t5 = this->t5;
+
+ this->t0 = t0 * u0 + t3 * u1;
+ this->t1 = t1 * u0 + t4 * u1;
+ this->t2 = t2 * u0 + t5 * u1 + u2;
+ this->t3 = t0 * u3 + t3 * u4;
+ this->t4 = t1 * u3 + t4 * u4;
+ this->t5 = t2 * u3 + t5 * u4 + u5;
+}
+
+void AugmentationLayerBase::TransMat::toIdentity() {
+ t0 = 1; t1 = 0; t2 = 0;
+ t3 = 0; t4 = 1; t5 = 0;
+}
+
+/** AugmentationCoeff Functions **/
+void AugmentationCoeff::clear() {
+ // Spatial variables
+ dx.clear();
+ dy.clear();
+ angle.clear();
+ zoom_x.clear();
+ zoom_y.clear();
+
+ // Chromatic variables
+ gamma.clear();
+ brightness.clear();
+ contrast.clear();
+ color1.clear();
+ color2.clear();
+ color3.clear();
+}
+
+void AugmentationCoeff::combine_with(const AugmentationCoeff& coeff) {
+ // Spatial types
+ if (coeff.dx) {
+ dx = dx() * coeff.dx();
+ }
+
+ if (coeff.dy) {
+ dy = dy() * coeff.dy();
+ }
+
+ if (coeff.angle) {
+ angle = angle() * coeff.angle();
+ }
+
+ if (coeff.zoom_x) {
+ zoom_x = zoom_x() * coeff.zoom_x();
+ }
+
+ if (coeff.zoom_y) {
+ zoom_y = zoom_y() * coeff.zoom_y();
+ }
+
+ // Chromatic types
+ if (coeff.gamma) {
+ gamma = gamma() * coeff.gamma();
+ }
+
+ if (coeff.brightness) {
+ brightness = brightness() * coeff.brightness();
+ }
+
+ if (coeff.contrast) {
+ contrast = contrast() * coeff.contrast();
+ }
+
+ if (coeff.color1) {
+ color1 = color1() * coeff.color1();
+ }
+
+ if (coeff.color2) {
+ color2 = color2() * coeff.color2();
+ }
+
+ if (coeff.color3) {
+ color3 = color3() * coeff.color3();
+ }
+}
+
+void AugmentationCoeff::replace_with(const AugmentationCoeff& coeff) {
+ // Spatial types
+ if (coeff.dx) {
+ dx = coeff.dx();
+ }
+
+ if (coeff.dy) {
+ dy = coeff.dy();
+ }
+
+ if (coeff.angle) {
+ angle = coeff.angle();
+ }
+
+ if (coeff.zoom_x) {
+ zoom_x = coeff.zoom_x();
+ }
+
+ if (coeff.zoom_y) {
+ zoom_y = coeff.zoom_y();
+ }
+
+ // Chromatic types
+ if (coeff.gamma) {
+ gamma = gamma() * coeff.gamma();
+ }
+
+ if (coeff.brightness) {
+ brightness = coeff.brightness();
+ }
+
+ if (coeff.contrast) {
+ contrast = coeff.contrast();
+ }
+
+ if (coeff.color1) {
+ color1 = coeff.color1();
+ }
+
+ if (coeff.color2) {
+ color2 = coeff.color2();
+ }
+
+ if (coeff.color3) {
+ color3 = coeff.color3();
+ }
+}
+
+/** AugmentationLayerBase Functions **/
+float AugmentationLayerBase::rng_generate(const AugmentationParam& param,
+ float discount_coeff,
+ const float default_value) {
+ std::random_device rd; // Will be used to obtain a seed for the random number
+ // engine
+ std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd()
+
+ float spread = param.spread * discount_coeff;
+
+ if (param.rand_type == "uniform_bernoulli") {
+ float tmp1 = 0.0;
+ bool tmp2 = false;
+
+ if (param.prob > 0.0) {
+ std::bernoulli_distribution bernoulli(param.prob);
+ tmp2 = bernoulli(gen);
+ }
+
+ if (!tmp2) {
+ return default_value;
+ }
+
+ if (param.spread > 0.0) {
+ std::uniform_real_distribution<> uniform(param.mean - spread,
+ param.mean + spread);
+ tmp1 = uniform(gen);
+ } else {
+ tmp1 = param.mean;
+ }
+
+ if (param.should_exp) {
+ tmp1 = exp(tmp1);
+ }
+
+ return tmp1;
+ } else if (param.rand_type == "gaussian_bernoulli") {
+ float tmp1 = 0.0;
+ bool tmp2 = false;
+
+ if (param.prob > 0.0) {
+ std::bernoulli_distribution bernoulli(param.prob);
+ tmp2 = bernoulli(gen);
+ }
+
+ if (!tmp2) {
+ return default_value;
+ }
+
+ if (spread > 0.0) {
+ std::normal_distribution<> normal(param.mean, spread);
+ tmp1 = normal(gen);
+ } else {
+ tmp1 = param.mean;
+ }
+
+ if (param.should_exp) {
+ tmp1 = exp(tmp1);
+ }
+
+ return tmp1;
+ } else {
+ throw "Unknown random type: " + param.rand_type;
+ }
+}
+
+void AugmentationLayerBase::generate_chromatic_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff) {
+ if (aug.gamma) {
+ coeff.gamma = rng_generate(aug.gamma(), discount_coeff, coeff.gamma.get_default());
+ }
+
+ if (aug.brightness) {
+ coeff.brightness =
+ rng_generate(aug.brightness(), discount_coeff, coeff.brightness.get_default());
+ }
+
+ if (aug.contrast) {
+ coeff.contrast = rng_generate(aug.contrast(), discount_coeff, coeff.contrast.get_default());
+ }
+
+ if (aug.color) {
+ coeff.color1 = rng_generate(aug.color(), discount_coeff, coeff.color1.get_default());
+ coeff.color2 = rng_generate(aug.color(), discount_coeff, coeff.color2.get_default());
+ coeff.color3 = rng_generate(aug.color(), discount_coeff, coeff.color3.get_default());
+ }
+}
+
+void AugmentationLayerBase::generate_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff) {
+ if (aug.translate) {
+ coeff.dx = rng_generate(aug.translate(), discount_coeff, coeff.dx.get_default());
+ coeff.dy = rng_generate(aug.translate(), discount_coeff, coeff.dy.get_default());
+ }
+
+ if (aug.rotate) {
+ coeff.angle = rng_generate(aug.rotate(), discount_coeff, coeff.angle.get_default());
+ }
+
+ if (aug.zoom) {
+ coeff.zoom_x = rng_generate(aug.zoom(), discount_coeff, coeff.zoom_x.get_default());
+ coeff.zoom_y = coeff.zoom_x();
+ }
+
+ if (aug.squeeze) {
+ float squeeze_coeff = rng_generate(aug.squeeze(), discount_coeff, 1.0);
+ coeff.zoom_x = coeff.zoom_x() * squeeze_coeff;
+ coeff.zoom_y = coeff.zoom_y() * squeeze_coeff;
+ }
+}
+
+void AugmentationLayerBase::generate_valid_spatial_coeffs(
+ float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff,
+ int src_width,
+ int src_height,
+ int out_width,
+ int out_height) {
+ int x, y;
+ float x1, y1, x2, y2;
+ int counter = 0;
+ int good_params = 0;
+ AugmentationCoeff incoming_coeff(coeff);
+
+ while (good_params < 4 && counter < 50) {
+ coeff.clear();
+ AugmentationLayerBase::generate_spatial_coeffs(discount_coeff, aug, coeff);
+ coeff.combine_with(incoming_coeff);
+
+ // Check if all 4 corners of the transformed image fit into the original
+ // image
+ good_params = 0;
+
+ for (x = 0; x < out_width; x += out_width - 1) {
+ for (y = 0; y < out_height; y += out_height - 1) {
+ // move the origin
+ x1 = x - 0.5 * out_width;
+ y1 = y - 0.5 * out_height;
+
+ // rotate
+ x2 = cos(coeff.angle()) * x1 - sin(coeff.angle()) * y1;
+ y2 = sin(coeff.angle()) * x1 + sin(coeff.angle()) * y1;
+
+ // translate
+ x2 = x2 + coeff.dx() * out_width;
+ y2 = y2 + coeff.dy() * out_height;
+
+ // zoom
+ x2 = x2 / coeff.zoom_x();
+ y2 = y2 / coeff.zoom_y();
+
+ // move the origin back
+ x2 = x2 + 0.5 * src_width;
+ y2 = y2 + 0.5 * src_height;
+
+ if (!((floor(x2) < 0) || (floor(x2) > src_width - 2.0) ||
+ (floor(y2) < 0) || (floor(y2) > src_height - 2.0))) {
+ good_params++;
+ }
+ }
+ }
+ counter++;
+ }
+
+ if (counter >= 50) {
+ printf("Warning: No suitable spatial transformation after %d attempts.\n", counter);
+ coeff.clear();
+ coeff.replace_with(incoming_coeff);
+ }
+}
+
+void AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(
+ const std::vector<AugmentationCoeff>& coeff_arr,
+ typename TTypes<float, 2>::Tensor& out)
+{
+ float *out_ptr = out.data();
+ int counter = 0;
+
+ for (AugmentationCoeff coeff : coeff_arr) {
+ out_ptr[counter + 0] = coeff.gamma();
+ out_ptr[counter + 1] = coeff.brightness();
+ out_ptr[counter + 2] = coeff.contrast();
+ out_ptr[counter + 3] = coeff.color1();
+ out_ptr[counter + 4] = coeff.color2();
+ out_ptr[counter + 5] = coeff.color3();
+ counter += 6;
+ }
+}
+
+void AugmentationLayerBase::copy_spatial_coeffs_to_tensor(
+ const std::vector<AugmentationCoeff>& coeff_arr,
+ const int out_width,
+ const int out_height,
+ const int src_width,
+ const int src_height,
+ typename TTypes<float, 2>::Tensor& out,
+ const bool invert)
+{
+ float *out_ptr = out.data();
+ int counter = 0;
+ TransMat t;
+
+ for (AugmentationCoeff coeff : coeff_arr) {
+ t.toIdentity();
+ t.fromCoeff(&coeff, out_width, out_height, src_width, src_height);
+
+ if (invert) {
+ t = t.inverse();
+ }
+
+ out_ptr[counter + 0] = t.t0;
+ out_ptr[counter + 1] = t.t1;
+ out_ptr[counter + 2] = t.t2;
+ out_ptr[counter + 3] = t.t3;
+ out_ptr[counter + 4] = t.t4;
+ out_ptr[counter + 5] = t.t5;
+ counter += 6;
+ }
+}
+}
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h
new file mode 100644
index 0000000..d2aba2c
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h
@@ -0,0 +1,228 @@
+#ifndef AUGMENTATION_LAYER_BASE_H_
+#define AUGMENTATION_LAYER_BASE_H_
+
+#include "tensorflow/core/framework/tensor_types.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace tensorflow {
+template<typename T>
+class OptionalType {
+ public:
+ OptionalType(const T default_value) : default_value(default_value), has_value(false) {}
+
+ operator bool() const {
+ return has_value;
+ }
+
+ OptionalType& operator=(T val) {
+ has_value = true;
+ value = val;
+ return *this;
+ }
+
+ const T operator()() const {
+ return has_value ? value : default_value;
+ }
+
+ void clear() {
+ has_value = false;
+ }
+
+ const T get_default() {
+ return default_value;
+ }
+
+ private:
+ T value;
+ bool has_value;
+ const T default_value;
+};
+
+class AugmentationCoeff {
+ public:
+ // Spatial Types
+ OptionalType<float>dx;
+ OptionalType<float>dy;
+ OptionalType<float>angle;
+ OptionalType<float>zoom_x;
+ OptionalType<float>zoom_y;
+
+ // Chromatic Types
+ OptionalType<float>gamma;
+ OptionalType<float>brightness;
+ OptionalType<float>contrast;
+ OptionalType<float>color1;
+ OptionalType<float>color2;
+ OptionalType<float>color3;
+
+ AugmentationCoeff() : dx(0.0), dy(0.0), angle(0.0), zoom_x(1.0), zoom_y(1.0), gamma(1.0),
+ brightness(0.0), contrast(1.0), color1(1.0), color2(1.0), color3(1.0) {}
+
+ AugmentationCoeff(const AugmentationCoeff& coeff) : AugmentationCoeff() {
+ replace_with(coeff);
+ }
+
+ void clear();
+
+ void combine_with(const AugmentationCoeff& coeff);
+
+ void replace_with(const AugmentationCoeff& coeff);
+};
+
+typedef struct AugmentationParam {
+ std::string rand_type;
+ bool should_exp;
+ float mean;
+ float spread;
+ float prob;
+} AugmentationParam;
+
+class AugmentationParams {
+ public:
+ int crop_height;
+ int crop_width;
+
+ // Spatial options
+ OptionalType<struct AugmentationParam>translate;
+ OptionalType<struct AugmentationParam>rotate;
+ OptionalType<struct AugmentationParam>zoom;
+ OptionalType<struct AugmentationParam>squeeze;
+
+ // Chromatic options
+ OptionalType<struct AugmentationParam>gamma;
+ OptionalType<struct AugmentationParam>brightness;
+ OptionalType<struct AugmentationParam>contrast;
+ OptionalType<struct AugmentationParam>color;
+
+ inline AugmentationParams(int crop_height,
+ int crop_width,
+ std::vector<std::string>params_name,
+ std::vector<std::string>params_rand_type,
+ std::vector<bool> params_exp,
+ std::vector<float> params_mean,
+ std::vector<float> params_spread,
+ std::vector<float> params_prob) :
+ crop_height(crop_height),
+ crop_width(crop_width),
+ translate(AugmentationParam()),
+ rotate(AugmentationParam()),
+ zoom(AugmentationParam()),
+ squeeze(AugmentationParam()),
+ gamma(AugmentationParam()),
+ brightness(AugmentationParam()),
+ contrast(AugmentationParam()),
+ color(AugmentationParam()) {
+ for (int i = 0; i < params_name.size(); i++) {
+ const std::string name = params_name[i];
+ const std::string rand_type = params_rand_type[i];
+ const bool should_exp = params_exp[i];
+ const float mean = params_mean[i];
+ const float spread = params_spread[i];
+ const float prob = params_prob[i];
+
+ struct AugmentationParam param = { rand_type, should_exp, mean, spread, prob };
+
+ if (name == "translate") {
+ this->translate = param;
+ } else if (name == "rotate") {
+ this->rotate = param;
+ } else if (name == "zoom") {
+ this->zoom = param;
+ } else if (name == "squeeze") {
+ this->squeeze = param;
+ } else if (name == "noise") {
+ // NoOp: We handle noise on the Python side
+ } else if (name == "gamma") {
+ this->gamma = param;
+ } else if (name == "brightness") {
+ this->brightness = param;
+ } else if (name == "contrast") {
+ this->contrast = param;
+ } else if (name == "color") {
+ this->color = param;
+ } else {
+ std::cout << "Ignoring unknown augmentation parameter: " << name << std::endl;
+ }
+ }
+ }
+
+ bool should_do_spatial_transform() {
+ return this->translate || this->rotate || this->zoom || this->squeeze;
+ }
+
+ bool should_do_chromatic_transform() {
+ return this->gamma || this->brightness || this->contrast || this->color;
+ }
+};
+
+class AugmentationLayerBase {
+ public:
+ class TransMat {
+ /**
+ * Translation matrix class for spatial augmentation
+ * | 0 1 2 |
+ * | 3 4 5 |
+ */
+
+ public:
+ float t0, t1, t2;
+ float t3, t4, t5;
+
+
+ void fromCoeff(AugmentationCoeff *coeff,
+ int out_width,
+ int out_height,
+ int src_width,
+ int src_height);
+
+ void fromTensor(const float *tensor_data);
+
+ TransMat inverse();
+
+ void leftMultiply(float u0,
+ float u1,
+ float u2,
+ float u3,
+ float u4,
+ float u5);
+
+ void toIdentity();
+ };
+
+ // TODO: Class ChromaticCoeffs
+
+ static float rng_generate(const AugmentationParam& param,
+ float discount_coeff,
+ const float default_value);
+
+ static void clear_spatial_coeffs(AugmentationCoeff& coeff);
+ static void generate_chromatic_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff);
+ static void generate_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff);
+ static void generate_valid_spatial_coeffs(float discount_coeff,
+ const AugmentationParams& aug,
+ AugmentationCoeff & coeff,
+ int src_width,
+ int src_height,
+ int out_width,
+ int out_height);
+
+ static void copy_chromatic_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
+ typename TTypes<float, 2>::Tensor& out);
+ static void copy_spatial_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr,
+ const int out_width,
+ const int out_height,
+ const int src_width,
+ const int src_height,
+ typename TTypes<float, 2>::Tensor& out,
+ const bool invert = false);
+};
+} // namespace tensorflow
+
+#endif // AUGMENTATION_LAYER_BASE_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc
new file mode 100644
index 0000000..77b8c83
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc
@@ -0,0 +1,461 @@
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include "augmentation_base.h"
+#include "data_augmentation.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+#include "tensorflow/core/util/work_sharder.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+inline float clamp(float f, float a, float b) {
+ return fmaxf(a, fminf(f, b));
+}
+
+template<>
+void Augment(OpKernelContext *context,
+ const CPUDevice& d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs) {
+ const int64 channel_count = batch_size * out_height * out_width;
+ const int kCostPerChannel = 10;
+ const DeviceBase::CpuWorkerThreads& worker_threads =
+ *context->device()->tensorflow_cpu_worker_threads();
+
+ Shard(worker_threads.num_threads,
+ worker_threads.workers,
+ channel_count,
+ kCostPerChannel,
+ [batch_size, channels, src_width,
+ src_height, src_count, out_width, out_height, src_data,
+ out_data, transMats, chromatic_coeffs](
+ int64 start_channel, int64 end_channel) {
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ for (int index = start_channel; index < end_channel; index++) {
+ int x = index % out_width;
+ int y = (index / out_width) % out_height;
+ int n = index / out_width / out_height;
+
+ const float *transMat = transMats + n * 6;
+
+ float gamma, brightness, contrast;
+
+ if (chromatic_coeffs) {
+ gamma = chromatic_coeffs[n * 6 + 0];
+ brightness = chromatic_coeffs[n * 6 + 1];
+ contrast = chromatic_coeffs[n * 6 + 2];
+ }
+
+ float xpos = x * transMat[0] + y * transMat[1] + transMat[2];
+ float ypos = x * transMat[3] + y * transMat[4] + transMat[5];
+
+ xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f);
+ ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f);
+
+ float tlx = floor(xpos);
+ float tly = floor(ypos);
+
+ float xdist = xpos - tlx;
+ float ydist = ypos - tly;
+
+ int srcTLIdxOffset = ((n * src_height + (int)tly) * src_width + (int)tlx) * channels;
+
+ // ((n * src_height + tly) * src_width + (tlx + 1)) * channels
+ int srcTRIdxOffset = srcTLIdxOffset + channels;
+
+ // ((n * src_height + (tly + 1)) * src_width + tlx) * channels
+ int srcBLIdxOffset = srcTLIdxOffset + channels * src_width;
+
+ // ((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels
+ int srcBRIdxOffset = srcTLIdxOffset + channels + channels * src_width;
+
+ // Variables for chromatic transform
+ int data_index[3];
+ float rgb[3];
+ float mean_in = 0;
+ float mean_out = 0;
+
+ for (int c = 0; c < channels; c++) {
+ // Bilinear interpolation
+ int srcTLIdx = srcTLIdxOffset + c;
+ int srcTRIdx = std::min(srcTRIdxOffset + c, src_count);
+ int srcBLIdx = std::min(srcBLIdxOffset + c, src_count);
+ int srcBRIdx = std::min(srcBRIdxOffset + c, src_count);
+
+ float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx]
+ + (xdist) * (ydist) * src_data[srcBRIdx]
+ + (1 - xdist) * (ydist) * src_data[srcBLIdx]
+ + (xdist) * (1 - ydist) * src_data[srcTRIdx];
+
+ if (chromatic_coeffs) {
+ // Gather data for chromatic transform
+ data_index[c] = index * channels + c;
+ rgb[c] = dest;
+ mean_in += rgb[c];
+
+ // Note: coeff[3] == color1, coeff[4] == color2, ...
+ rgb[c] *= chromatic_coeffs[n * 6 + (3 + c)];
+
+ mean_out += rgb[c];
+ } else {
+ out_data[index * channels + c] = dest;
+ }
+ }
+
+ float brightness_coeff = mean_in / (mean_out + 0.01f);
+
+ if (chromatic_coeffs) {
+ // Chromatic transformation
+ for (int c = 0; c < channels; c++) {
+ // compensate brightness
+ rgb[c] = clamp(rgb[c] * brightness_coeff, 0.0f, 1.0f);
+
+ // gamma change
+ rgb[c] = pow(rgb[c], gamma);
+
+ // brightness change
+ rgb[c] = rgb[c] + brightness;
+
+ // contrast change
+ rgb[c] = 0.5f + (rgb[c] - 0.5f) * contrast;
+
+ out_data[data_index[c]] = clamp(rgb[c], 0.0f, 1.0f);
+ }
+ }
+ }
+ });
+}
+
+template<typename Device>
+class DataAugmentation : public OpKernel {
+ public:
+ explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the crop [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+ OP_REQUIRES(ctx, crop_.size() == 2,
+ errors::InvalidArgument("crop must be 2 dimensions"));
+
+ // TODO: Verify params are all the same length
+
+ // Get the tensors for params_a and verify their dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", &params_a_name_));
+ OP_REQUIRES_OK(ctx,
+ ctx->GetAttr("params_a_rand_type", &params_a_rand_type_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", &params_a_exp_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", &params_a_mean_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread", &params_a_spread_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", &params_a_prob_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_coeff_schedule", &params_a_coeff_schedule_));
+
+ // Get the tensors for params_b and verify their dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", &params_b_name_));
+ OP_REQUIRES_OK(ctx,
+ ctx->GetAttr("params_b_rand_type", &params_b_rand_type_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", &params_b_exp_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", &params_b_mean_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread", &params_b_spread_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", &params_b_prob_));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_coeff_schedule", &params_b_coeff_schedule_));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images
+ const Tensor& input_a_t = ctx->input(0);
+ const Tensor& input_b_t = ctx->input(1);
+
+ // Get the global step value
+ const Tensor& global_step_t = ctx->input(2);
+ auto global_step_eigen = global_step_t.tensor<int64, 0>();
+ const int64 global_step = global_step_eigen.data()[0];
+
+ // Dimension constants
+ const int batch_size = input_a_t.dim_size(0);
+ const int src_height = input_a_t.dim_size(1);
+ const int src_width = input_a_t.dim_size(2);
+ const int channels = input_a_t.dim_size(3);
+ const int src_count = batch_size * src_height * src_width * channels;
+ const int out_height = crop_[0];
+ const int out_width = crop_[1];
+ const int out_count = batch_size * out_height * out_width * channels;
+
+ // All tensors for this op
+ Tensor chromatic_coeffs_a_t;
+ Tensor chromatic_coeffs_b_t;
+
+ // Allocate the memory for the output images
+ Tensor *output_a_t;
+ Tensor *output_b_t;
+
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(0, TensorShape({ batch_size, crop_[0], crop_[1],
+ channels }), &output_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(1, TensorShape({ batch_size, crop_[0], crop_[1],
+ channels }), &output_b_t));
+
+ // Allocate the memory for the output spatial transforms
+ Tensor *spat_transform_a_t;
+ Tensor *spat_transform_b_t;
+
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(2, TensorShape({ batch_size, 6 }),
+ &spat_transform_a_t));
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_output(3, TensorShape({ batch_size, 6 }),
+ &spat_transform_b_t));
+
+ // Compute discount for coefficients if using a schedule
+ float discount_coeff_a = 1.0;
+ float discount_coeff_b = 1.0;
+
+ if (params_a_coeff_schedule_.size() == 3) {
+ float half_life = params_a_coeff_schedule_[0];
+ float initial_coeff = params_a_coeff_schedule_[1];
+ float final_coeff = params_a_coeff_schedule_[2];
+ discount_coeff_a = initial_coeff + (final_coeff - initial_coeff) *
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0);
+ }
+
+ if (params_b_coeff_schedule_.size() == 3) {
+ if (params_a_coeff_schedule_.size() == 3) {
+ discount_coeff_b = discount_coeff_a;
+ } else {
+ float half_life = params_b_coeff_schedule_[0];
+ float initial_coeff = params_b_coeff_schedule_[1];
+ float final_coeff = params_b_coeff_schedule_[2];
+ discount_coeff_b = initial_coeff + (final_coeff - initial_coeff) *
+ (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0);
+ }
+ }
+
+ /*** BEGIN AUGMENTATION TO IMAGE A ***/
+ auto input_a = input_a_t.tensor<float, 4>();
+ auto output_a = output_a_t->tensor<float, 4>();
+
+ // Load augmentation parameters for image A
+ AugmentationParams aug_a = AugmentationParams(out_height, out_width,
+ params_a_name_,
+ params_a_rand_type_,
+ params_a_exp_,
+ params_a_mean_,
+ params_a_spread_,
+ params_a_prob_);
+
+ std::vector<AugmentationCoeff> coeffs_a;
+
+
+ bool gen_spatial_transform = aug_a.should_do_spatial_transform();
+ bool gen_chromatic_transform = aug_a.should_do_chromatic_transform();
+
+ for (int n = 0; n < batch_size; n++) {
+ AugmentationCoeff coeff;
+
+ if (gen_spatial_transform) {
+ AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_a, aug_a, coeff,
+ src_width, src_height,
+ out_width, out_height);
+ }
+
+ if (gen_chromatic_transform) {
+ AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_a, aug_a, coeff);
+ }
+
+ coeffs_a.push_back(coeff);
+ }
+
+ // Copy spatial coefficients A to the output Tensor on the CPU
+ // (output for FlowAugmentation)
+ auto spat_transform_a = spat_transform_a_t->tensor<float, 2>();
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_a);
+
+ float *chromatic_coeffs_a_data = NULL;
+
+ if (gen_chromatic_transform) {
+ // Allocate a temporary tensor to hold the chromatic coefficients
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, 6 }),
+ &chromatic_coeffs_a_t));
+
+ // Copy the chromatic coefficients A to a temporary Tensor on the CPU
+ auto chromatic_coeffs_a = chromatic_coeffs_a_t.tensor<float, 2>();
+ AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_a, chromatic_coeffs_a);
+ chromatic_coeffs_a_data = chromatic_coeffs_a.data();
+ }
+
+ // Perform augmentation either on CPU or GPU
+ Augment<Device>(
+ ctx,
+ ctx->eigen_device<Device>(),
+ batch_size,
+ channels,
+ src_width,
+ src_height,
+ src_count,
+ out_width,
+ out_height,
+ input_a.data(),
+ output_a.data(),
+ spat_transform_a.data(),
+ chromatic_coeffs_a_data);
+
+ /*** END AUGMENTATION TO IMAGE A ***/
+
+ /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+ AugmentationParams aug_b = AugmentationParams(out_height, out_width,
+ params_b_name_,
+ params_b_rand_type_,
+ params_b_exp_,
+ params_b_mean_,
+ params_b_spread_,
+ params_b_prob_);
+
+ std::vector<AugmentationCoeff> coeffs_b;
+
+ bool gen_spatial_transform_b = aug_b.should_do_spatial_transform();
+ bool gen_chromatic_transform_b = aug_b.should_do_chromatic_transform();
+
+ for (int n = 0; n < batch_size; n++) {
+ AugmentationCoeff coeff(coeffs_a[n]);
+
+ // If we did a spatial transform on image A, we need to do the same one
+ // (+ possibly more) on image B
+ if (gen_spatial_transform_b) {
+ AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_b, aug_b, coeff,
+ src_width, src_height,
+ out_width, out_height);
+ }
+
+ if (gen_chromatic_transform_b) {
+ AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_b, aug_b, coeff);
+ }
+
+ coeffs_b.push_back(coeff);
+ }
+
+ /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+
+ /*** BEGIN AUGMENTATION TO IMAGE B ***/
+ auto input_b = input_b_t.tensor<float, 4>();
+ auto output_b = output_b_t->tensor<float, 4>();
+
+ // Copy spatial coefficients B to the output Tensor on the CPU
+ auto spat_transform_b = spat_transform_b_t->tensor<float, 2>();
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_b);
+
+ float *chromatic_coeffs_b_data = NULL;
+
+ if (gen_chromatic_transform || gen_chromatic_transform_b) {
+ // Allocate a temporary tensor to hold the chromatic coefficients
+ tensorflow::AllocatorAttributes pinned_allocator;
+ pinned_allocator.set_on_host(true);
+ pinned_allocator.set_gpu_compatible(true);
+ OP_REQUIRES_OK(ctx,
+ ctx->allocate_temp(DataTypeToEnum<float>::value,
+ TensorShape({ batch_size, 6 }),
+ &chromatic_coeffs_b_t, pinned_allocator));
+
+ // Copy the chromatic coefficients A to a temporary Tensor on the CPU
+ auto chromatic_coeffs_b = chromatic_coeffs_b_t.tensor<float, 2>();
+ AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_b, chromatic_coeffs_b);
+ chromatic_coeffs_b_data = chromatic_coeffs_b.data();
+ }
+
+ // Perform augmentation either on CPU or GPU
+ Augment<Device>(
+ ctx,
+ ctx->eigen_device<Device>(),
+ batch_size,
+ channels,
+ src_width,
+ src_height,
+ src_count,
+ out_width,
+ out_height,
+ input_b.data(),
+ output_b.data(),
+ spat_transform_b.data(),
+ chromatic_coeffs_b_data);
+
+ // FlowAugmentation needs the inverse
+ // TODO: To avoid rewriting, can we invert when we read on the
+ // FlowAugmentation side?
+ AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+ out_width, out_height,
+ src_width, src_height,
+ spat_transform_b,
+ true);
+
+ /*** END AUGMENTATION TO IMAGE B ***/
+ }
+
+ private:
+ std::vector<int32>crop_;
+
+ // Params A
+ std::vector<string>params_a_name_;
+ std::vector<string>params_a_rand_type_;
+ std::vector<bool>params_a_exp_;
+ std::vector<float>params_a_mean_;
+ std::vector<float>params_a_spread_;
+ std::vector<float>params_a_prob_;
+ std::vector<float>params_a_coeff_schedule_;
+
+ // Params B
+ std::vector<string>params_b_name_;
+ std::vector<string>params_b_rand_type_;
+ std::vector<bool>params_b_exp_;
+ std::vector<float>params_b_mean_;
+ std::vector<float>params_b_spread_;
+ std::vector<float>params_b_prob_;
+ std::vector<float>params_b_coeff_schedule_;
+};
+
+
+REGISTER_KERNEL_BUILDER(Name("DataAugmentation")
+ .Device(DEVICE_CPU)
+ .HostMemory("global_step")
+ .HostMemory("transforms_from_a")
+ .HostMemory("transforms_from_b"),
+ DataAugmentation<CPUDevice>)
+
+#if GOOGLE_CUDA
+
+REGISTER_KERNEL_BUILDER(Name("DataAugmentation")
+ .Device(DEVICE_GPU)
+ .HostMemory("global_step")
+ .HostMemory("transforms_from_a")
+ .HostMemory("transforms_from_b"),
+ DataAugmentation<GPUDevice>)
+#endif // GOOGLE_CUDA
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc
new file mode 100644
index 0000000..7a2101d
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc
@@ -0,0 +1,348 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "augmentation_base.h"
+#include "data_augmentation.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+inline __device__ __host__ float clamp(float f, float a, float b) {
+ return fmaxf(a, fminf(f, b));
+}
+
+__global__ void SpatialAugmentation(
+ const int32 nthreads,
+ const int src_width,
+ const int src_height,
+ const int channels,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ int c = index % channels;
+ int x = (index / channels) % out_width;
+ int y = (index / channels / out_width) % out_height;
+ int n = index / channels / out_width / out_height;
+
+ const float *transMat = transMats + n * 6;
+ float xpos = x * transMat[0] + y * transMat[1] + transMat[2];
+ float ypos = x * transMat[3] + y * transMat[4] + transMat[5];
+
+ xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f);
+ ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f);
+
+ float tlx = floor(xpos);
+ float tly = floor(ypos);
+
+ // Bilinear interpolation
+ int srcTLIdx = ((n * src_height + tly) * src_width + tlx) * channels + c;
+ int srcTRIdx = min((int)(((n * src_height + tly) * src_width + (tlx + 1)) * channels + c),
+ src_count);
+ int srcBLIdx = min((int)(((n * src_height + (tly + 1)) * src_width + tlx) * channels + c),
+ src_count);
+ int srcBRIdx = min((int)(((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels + c),
+ src_count);
+
+ float xdist = xpos - tlx;
+ float ydist = ypos - tly;
+
+ float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx]
+ + (xdist) * (ydist) * src_data[srcBRIdx]
+ + (1 - xdist) * (ydist) * src_data[srcBLIdx]
+ + (xdist) * (1 - ydist) * src_data[srcTRIdx];
+
+ out_data[index] = dest;
+ }
+}
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template<>
+void Augment(OpKernelContext *context,
+ const GPUDevice& d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs) {
+ const int out_count = batch_size * out_height * out_width * channels;
+ CudaLaunchConfig config = GetCudaLaunchConfig(out_count, d);
+
+ printf("Chromatic transform not yet implemented on GPU, ignoring.");
+
+ SpatialAugmentation << < config.block_count, config.thread_per_block, 0, d.stream() >> > (
+ config.virtual_thread_count, src_width, src_height, channels, src_count,
+ out_width, out_height,
+ src_data, out_data, transMats);
+}
+
+//
+// template<typename Device>
+// class DataAugmentation : public OpKernel {
+// public:
+// explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+// // Get the crop [height, width] tensor and verify its dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+// OP_REQUIRES(ctx, crop_.size() == 2,
+// errors::InvalidArgument("crop must be 2 dimensions"));
+//
+// // TODO: Verify params are all the same length
+//
+// // Get the tensors for params_a and verify their dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", &params_a_name_));
+// OP_REQUIRES_OK(ctx,
+// ctx->GetAttr("params_a_rand_type",
+// &params_a_rand_type_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", &params_a_exp_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", &params_a_mean_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread",
+// &params_a_spread_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", &params_a_prob_));
+//
+// // Get the tensors for params_b and verify their dimensions
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", &params_b_name_));
+// OP_REQUIRES_OK(ctx,
+// ctx->GetAttr("params_b_rand_type",
+// &params_b_rand_type_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", &params_b_exp_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", &params_b_mean_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread",
+// &params_b_spread_));
+// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", &params_b_prob_));
+// }
+//
+// void Compute(OpKernelContext *ctx) override {
+// const GPUDevice& device = ctx->eigen_gpu_device();
+//
+// // Get the input images
+// const Tensor& input_a_t = ctx->input(0);
+// const Tensor& input_b_t = ctx->input(1);
+//
+// // Dimension constants
+// const int batch_size = input_a_t.dim_size(0);
+// const int src_height = input_a_t.dim_size(1);
+// const int src_width = input_a_t.dim_size(2);
+// const int channels = input_a_t.dim_size(3);
+// const int src_count = batch_size * src_height * src_width * channels;
+// const int out_height = crop_[0];
+// const int out_width = crop_[1];
+// const int out_count = batch_size * out_height * out_width * channels;
+//
+// // Allocate the memory for the output images
+// Tensor *output_a_t;
+// Tensor *output_b_t;
+//
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(0, TensorShape({ batch_size,
+// crop_[0], crop_[1],
+// channels }),
+// &output_a_t));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(1, TensorShape({ batch_size,
+// crop_[0], crop_[1],
+// channels }),
+// &output_b_t));
+//
+// // Allocate the memory for the output spatial transforms
+// Tensor *spat_transform_a_t;
+// Tensor *spat_transform_b_t;
+//
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(2, TensorShape({ batch_size, 6 }),
+// &spat_transform_a_t));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_output(3, TensorShape({ batch_size, 6 }),
+// &spat_transform_b_t));
+//
+// // Allocate temporary pinned memory for the spatial transforms to be
+// used
+// // on the GPU
+// tensorflow::AllocatorAttributes pinned_allocator;
+// pinned_allocator.set_on_host(true);
+// pinned_allocator.set_gpu_compatible(true);
+//
+// Tensor spat_transform_a_pinned_t;
+// Tensor spat_transform_b_pinned_t;
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_temp(DataTypeToEnum<float>::value,
+// TensorShape({ batch_size, 6 }),
+// &spat_transform_a_pinned_t,
+// pinned_allocator));
+// OP_REQUIRES_OK(ctx,
+// ctx->allocate_temp(DataTypeToEnum<float>::value,
+// TensorShape({ batch_size, 6 }),
+// &spat_transform_b_pinned_t,
+// pinned_allocator));
+// auto spat_transform_a_pinned = spat_transform_a_pinned_t.tensor<float,
+// 2>();
+// auto spat_transform_b_pinned = spat_transform_b_pinned_t.tensor<float,
+// 2>();
+//
+// /*** BEGIN AUGMENTATION TO IMAGE A ***/
+// auto input_a = input_a_t.tensor<float, 4>();
+// auto output_a = output_a_t->tensor<float, 4>();
+//
+// // Load augmentation parameters for image A
+// AugmentationParams aug_a = AugmentationParams(out_height, out_width,
+// params_a_name_,
+// params_a_rand_type_,
+// params_a_exp_,
+// params_a_mean_,
+// params_a_spread_,
+// params_a_prob_);
+//
+// std::vector<AugmentationCoeff> coeffs_a;
+//
+// bool gen_spatial_transform = aug_a.should_do_spatial_transform();
+//
+// for (int n = 0; n < batch_size; n++) {
+// AugmentationCoeff coeff;
+//
+// if (gen_spatial_transform) {
+// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_a, coeff,
+// src_width,
+// src_height,
+// out_width,
+// out_height);
+// }
+//
+// coeffs_a.push_back(coeff);
+// }
+//
+// // Copy spatial coefficients A to the output Tensor on the CPU (output
+// for
+// // FlowAugmentation)
+// auto spat_transform_a = spat_transform_a_t->tensor<float, 2>();
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+// spat_transform_a);
+//
+// // ...as well as a Tensor going to the GPU
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+//
+//
+//
+// spat_transform_a_pinned);
+//
+// CudaLaunchConfig config = GetCudaLaunchConfig(out_count, device);
+// SpatialAugmentation << < config.block_count, config.thread_per_block,
+// 0,
+// device.stream() >> > (
+// config.virtual_thread_count, src_width, src_height, channels,
+// src_count,
+// out_width, out_height,
+// input_a.data(), output_a.data(), spat_transform_a_pinned.data());
+//
+// /*** END AUGMENTATION TO IMAGE A ***/
+//
+// /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+// AugmentationParams aug_b = AugmentationParams(out_height, out_width,
+// params_b_name_,
+// params_b_rand_type_,
+// params_b_exp_,
+// params_b_mean_,
+// params_b_spread_,
+// params_b_prob_);
+//
+// std::vector<AugmentationCoeff> coeffs_b;
+//
+// gen_spatial_transform = aug_b.should_do_spatial_transform();
+//
+// for (int n = 0; n < batch_size; n++) {
+// AugmentationCoeff coeff;
+//
+// if (gen_spatial_transform) {
+// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_b, coeff,
+// src_width,
+// src_height,
+// out_width,
+// out_height);
+// }
+//
+// coeffs_b.push_back(coeff);
+// }
+//
+// /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/
+//
+// /*** BEGIN AUGMENTATION TO IMAGE B ***/
+// auto input_b = input_b_t.tensor<float, 4>();
+// auto output_b = output_b_t->tensor<float, 4>();
+//
+// // Copy spatial coefficients B to the output Tensor on the CPU
+// auto spat_transform_b = spat_transform_b_t->tensor<float, 2>();
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+// spat_transform_b,
+// true);
+// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b,
+// out_width,
+// out_height,
+// src_width,
+// src_height,
+//
+//
+//
+// spat_transform_b_pinned);
+//
+// SpatialAugmentation << < config.block_count, config.thread_per_block,
+// 0,
+// device.stream() >> > (
+// config.virtual_thread_count, src_width, src_height, channels,
+// src_count,
+// out_width, out_height,
+// input_b.data(), output_b.data(), spat_transform_b_pinned.data());
+//
+// /*** END AUGMENTATION TO IMAGE B ***/
+// }
+//
+// private:
+// std::vector<int32>crop_;
+//
+// // Params A
+// std::vector<string>params_a_name_;
+// std::vector<string>params_a_rand_type_;
+// std::vector<bool>params_a_exp_;
+// std::vector<float>params_a_mean_;
+// std::vector<float>params_a_spread_;
+// std::vector<float>params_a_prob_;
+//
+// // Params B
+// std::vector<string>params_b_name_;
+// std::vector<string>params_b_rand_type_;
+// std::vector<bool>params_b_exp_;
+// std::vector<float>params_b_mean_;
+// std::vector<float>params_b_spread_;
+// std::vector<float>params_b_prob_;
+// };
+} // namespace tensorflow
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h
new file mode 100644
index 0000000..545b8a0
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h
@@ -0,0 +1,22 @@
+#ifndef FLOWNET_DATA_AUGMENTATION_H_
+#define FLOWNET_DATA_AUGMENTATION_H_
+
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+template<class Device>
+void Augment(OpKernelContext *context,
+ const Device & d,
+ const int batch_size,
+ const int channels,
+ const int src_width,
+ const int src_height,
+ const int src_count,
+ const int out_width,
+ const int out_height,
+ const float *src_data,
+ float *out_data,
+ const float *transMats,
+ float *chromatic_coeffs);
+} // namespace tensorflow
+#endif // FLOWNET_DATA_AUGMENTATION_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc
new file mode 100644
index 0000000..b5cc11f
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc
@@ -0,0 +1,129 @@
+#define EIGEN_USE_THREADS
+
+#include "flow_augmentation.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+inline int clamp(int f, int a, int b) {
+ return std::max(a, std::min(f, b));
+}
+
+template<>
+void FillFlowAugmentation(const CPUDevice& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<float, 2>::ConstTensor transforms_from_b) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int src_height = flows.dimension(1);
+ const int src_width = flows.dimension(2);
+ const int src_total_count = flows.dimension(0) * flows.dimension(1) *
+ flows.dimension(2) * flows.dimension(3);
+ float *output_ptr = output.data();
+ const float *flow_ptr = flows.data();
+
+ for (int n = 0; n < batch_size; n++) {
+ const float *transMatA = transforms_from_a.data() + n * 6;
+ const float *transMatB = transforms_from_b.data() + n * 6;
+
+ for (int y = 0; y < out_height; y++) {
+ int outputIdxOffset = (n * out_height + y) * out_width;
+
+ for (int x = 0; x < out_width; x++) {
+ // Apply transformation matrix applied to first image
+ const float xpos1 = x * transMatA[0] + y * transMatA[1] + transMatA[2];
+ const float ypos1 = x * transMatA[3] + y * transMatA[4] + transMatA[5];
+
+ const int srcXIdx =
+ ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) * 2 + 0;
+ const int srcYIdx = srcXIdx + 1;
+
+ const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
+ const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
+
+ // Apply inverse of the transformation matrix applied to second image
+ const float xpos3 = xpos2 * transMatB[0] + ypos2 * transMatB[1] + transMatB[2];
+ const float ypos3 = xpos2 * transMatB[3] + ypos2 * transMatB[4] + transMatB[5];
+
+ output_ptr[(outputIdxOffset + x) * 2 + 0] = xpos3 - (float)x;
+ output_ptr[(outputIdxOffset + x) * 2 + 1] = ypos3 - (float)y;
+ }
+ }
+ }
+}
+
+template<typename Device>
+class FlowAugmentation : public OpKernel {
+ public:
+ explicit FlowAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) {
+ // Get the crop [height, width] tensor and verify its dimensions
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_));
+ OP_REQUIRES(ctx, crop_.size() == 2,
+ errors::InvalidArgument("crop must be 2 dimensions"));
+ }
+
+ void Compute(OpKernelContext *ctx) override {
+ // Get the input images and transforms and verify their dimensions
+ const Tensor& flows_t = ctx->input(0);
+ const Tensor& transforms_from_a_t = ctx->input(1);
+ const Tensor& transforms_from_b_t = ctx->input(2);
+
+ OP_REQUIRES(ctx, flows_t.dims() == 4,
+ errors::InvalidArgument("Input images must have rank 4"));
+ OP_REQUIRES(ctx,
+ (TensorShapeUtils::IsMatrix(transforms_from_a_t.shape()) &&
+ transforms_from_a_t.dim_size(0) ==
+ flows_t.dim_size(0) &&
+ transforms_from_a_t.dim_size(1) == 6),
+ errors::InvalidArgument(
+ "Input transforms_from_a should be num_images x 6"));
+ OP_REQUIRES(ctx,
+ (TensorShapeUtils::IsMatrix(transforms_from_b_t.shape()) &&
+ transforms_from_b_t.dim_size(0) ==
+ flows_t.dim_size(0) &&
+ transforms_from_b_t.dim_size(1) == 6),
+ errors::InvalidArgument(
+ "Input transforms_from_b should be num_images x 6"));
+
+ // Allocate the memory for the output
+ Tensor *output_t;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(
+ 0,
+ TensorShape({ flows_t.dim_size(0), crop_[0], crop_[1],
+ flows_t.dim_size(3) }), &output_t));
+
+ // Perform flow augmentation
+ auto flows = flows_t.tensor<float, 4>();
+ auto transforms_from_a = transforms_from_a_t.tensor<float, 2>();
+ auto transforms_from_b = transforms_from_b_t.tensor<float, 2>();
+ auto output = output_t->tensor<float, 4>();
+
+ FillFlowAugmentation(ctx->eigen_device<Device>(),
+ output,
+ flows,
+ transforms_from_a,
+ transforms_from_b);
+ }
+
+ private:
+ std::vector<int32>crop_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
+ .Device(DEVICE_CPU),
+ FlowAugmentation<CPUDevice>)
+
+#if GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(Name("FlowAugmentation")
+ .Device(DEVICE_GPU),
+ FlowAugmentation<GPUDevice>)
+#endif // GOOGLE_CUDA
+} // end namespace tensorflow
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h
new file mode 100644
index 0000000..7795991
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h
@@ -0,0 +1,19 @@
+#ifndef FLOWNET_FLOW_AUG_H_
+#define FLOWNET_FLOW_AUG_H_
+
+// See docs in ../ops/image_ops.cc.
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+template<class Device>
+void FillFlowAugmentation(const Device& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<float, 2>::ConstTensor transforms_from_b);
+} // end namespace tensorflow
+
+#endif // FLOWNET_FLOW_AUG_H_
diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
new file mode 100644
index 0000000..7e10864
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc
@@ -0,0 +1,95 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+#include <iostream>
+
+#include "flow_augmentation.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+typedef Eigen::GpuDevice GPUDevice;
+
+inline __device__ __host__ int clamp(int f, int a, int b) {
+ return max(a, min(f, b));
+}
+
+__global__ void FillFlowAugmentationKernel(
+ const int32 nthreads,
+ const float *flow_ptr,
+ const float *transforms_from_a,
+ const float *inv_transforms_from_b,
+ const int src_total_count, const int src_height, const int src_width,
+ const int batch_size, const int out_height,
+ const int out_width, float *output_ptr) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ const float x = (float)(index % out_width);
+ const float y = (float)((index / out_width) % out_height);
+ const int n = (index / out_width / out_height);
+
+ const int transformIdx = n * 6;
+
+ // Apply transformation matrix applied to second image
+ const float xpos1 = x * transforms_from_a[transformIdx + 0]
+ + y * transforms_from_a[transformIdx + 1]
+ + transforms_from_a[transformIdx + 2];
+ const float ypos1 = x * transforms_from_a[transformIdx + 3]
+ + y * transforms_from_a[transformIdx + 4]
+ + transforms_from_a[transformIdx + 5];
+
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+ const int srcXIdx =
+ ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) *
+ 2 + 0;
+ const int srcYIdx = srcXIdx + 1;
+
+ const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)];
+ const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)];
+
+ // Apply inverse of the transformation matrix applied to first image
+ const float xpos3 = xpos2 * inv_transforms_from_b[transformIdx + 0]
+ + ypos2 * inv_transforms_from_b[transformIdx + 1]
+ + inv_transforms_from_b[transformIdx + 2];
+ const float ypos3 = xpos2 * inv_transforms_from_b[transformIdx + 3]
+ + ypos2 * inv_transforms_from_b[transformIdx + 4]
+ + inv_transforms_from_b[transformIdx + 5];
+
+ output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 0] = xpos3 -
+ x;
+ output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 1] = ypos3 -
+ y;
+ }
+}
+
+template<>
+void FillFlowAugmentation(const GPUDevice& device,
+ typename TTypes<float, 4>::Tensor output,
+ typename TTypes<float, 4>::ConstTensor flows,
+ typename TTypes<const float, 2>::ConstTensor transforms_from_a,
+ typename TTypes<const float, 2>::ConstTensor transforms_from_b) {
+ const int batch_size = output.dimension(0);
+ const int out_height = output.dimension(1);
+ const int out_width = output.dimension(2);
+ const int depth = 2;
+ const int total_count = batch_size * out_height * out_width * depth;
+ const int src_total_count = flows.dimension(0) * flows.dimension(1) *
+ flows.dimension(2) * flows.dimension(3);
+
+ CudaLaunchConfig config = GetCudaLaunchConfig(total_count / 2, device);
+
+ FillFlowAugmentationKernel << < config.block_count, config.thread_per_block, 0,
+ device.stream() >> > (
+ total_count / 2, flows.data(), transforms_from_a.data(),
+ transforms_from_b.data(),
+ src_total_count, flows.dimension(1), flows.dimension(2), batch_size,
+ out_height, out_width, output.data());
+}
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA
diff --git a/Codes/flownet2/src/ops/preprocessing/preprocessing.cc b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc
new file mode 100644
index 0000000..086a0d0
--- /dev/null
+++ b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc
@@ -0,0 +1,96 @@
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+using shape_inference::DimensionHandle;
+
+Status SetOutputToSizedImage(InferenceContext *c) {
+ ShapeHandle input;
+
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+ DimensionHandle batch = c->Dim(input, 0);
+ DimensionHandle depth = c->Dim(input, 3);
+ std::vector<int32> crop_;
+ c->GetAttr("crop", &crop_);
+ DimensionHandle height = c->MakeDim(crop_[0]);
+ DimensionHandle width = c->MakeDim(crop_[1]);
+ c->set_output(0, c->MakeShape({ batch, height, width, depth }));
+ return Status::OK();
+}
+
+REGISTER_OP("DataAugmentation")
+.Input("image_a: float32")
+.Input("image_b: float32")
+.Input("global_step: int64")
+.Attr("crop: list(int) >= 2")
+.Attr("params_a_name: list(string)")
+.Attr("params_a_rand_type: list(string)")
+.Attr("params_a_exp: list(bool)")
+.Attr("params_a_mean: list(float)")
+.Attr("params_a_spread: list(float)")
+.Attr("params_a_prob: list(float)")
+.Attr("params_a_coeff_schedule: list(float)")
+.Attr("params_b_name: list(string)")
+.Attr("params_b_rand_type: list(string)")
+.Attr("params_b_exp: list(bool)")
+.Attr("params_b_mean: list(float)")
+.Attr("params_b_spread: list(float)")
+.Attr("params_b_prob: list(float)")
+.Attr("params_b_coeff_schedule: list(float)")
+.Output("aug_image_a: float32")
+.Output("aug_image_b: float32")
+.Output("transforms_from_a: float32")
+.Output("transforms_from_b: float32")
+.SetShapeFn([](InferenceContext *c) {
+ // Verify input A and input B both have 4 dimensions
+ ShapeHandle input_shape_a, input_shape_b;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape_a));
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_shape_b));
+
+ // TODO: Verify params vectors all have the same length
+
+ // TODO: Move this out of here and into Compute
+ // Verify input A and input B are the same shape
+ DimensionHandle batch_size, unused;
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 0),
+ c->Value(c->Dim(input_shape_b, 0)),
+ &batch_size));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 1),
+ c->Value(c->Dim(input_shape_b, 1)), &unused));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 2),
+ c->Value(c->Dim(input_shape_b, 2)), &unused));
+ TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 3),
+ c->Value(c->Dim(input_shape_b, 3)), &unused));
+
+ // Get cropping dimensions
+ std::vector<int32>crop_;
+ TF_RETURN_IF_ERROR(c->GetAttr("crop", &crop_));
+
+ // Reshape input shape to cropped shape
+ TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 1, c->MakeDim(crop_[0]),
+ &input_shape_a));
+ TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 2, c->MakeDim(crop_[1]),
+ &input_shape_a));
+
+ // Set output images shapes
+ c->set_output(0, input_shape_a);
+ c->set_output(1, input_shape_a);
+
+ // Set output spatial transforms shapes
+ c->set_output(2, c->MakeShape({ batch_size, 6 }));
+ c->set_output(3, c->MakeShape({ batch_size, 6 }));
+
+ return Status::OK();
+ });
+
+REGISTER_OP("FlowAugmentation")
+.Input("flows: float32")
+.Input("transforms_from_a: float32")
+.Input("transforms_from_b: float32")
+.Attr("crop: list(int) >= 2")
+.Output("transformed_flows: float32")
+.SetShapeFn(SetOutputToSizedImage);
+} // namespace tensorflow
diff --git a/Codes/flownet2/src/training_schedules.py b/Codes/flownet2/src/training_schedules.py
new file mode 100644
index 0000000..4db5aab
--- /dev/null
+++ b/Codes/flownet2/src/training_schedules.py
@@ -0,0 +1,12 @@
+LONG_SCHEDULE = {
+ 'step_values': [400000, 600000, 800000, 1000000],
+ 'learning_rates': [0.0001, 0.00005, 0.000025, 0.0000125, 0.00000625],
+ 'momentum': 0.9,
+ 'momentum2': 0.999,
+ 'weight_decay': 0.0004,
+ 'max_iter': 1200000,
+}
+
+FINETUNE_SCHEDULE = {
+ # TODO: Finetune schedule
+}
diff --git a/Codes/flownet2/src/utils.py b/Codes/flownet2/src/utils.py
new file mode 100644
index 0000000..f6abe18
--- /dev/null
+++ b/Codes/flownet2/src/utils.py
@@ -0,0 +1,46 @@
+import tensorflow as tf
+
+
+# Thanks, https://github.com/tensorflow/tensorflow/issues/4079
+def LeakyReLU(x, leak=0.1, name="lrelu"):
+ with tf.variable_scope(name):
+ f1 = 0.5 * (1.0 + leak)
+ f2 = 0.5 * (1.0 - leak)
+ return f1 * x + f2 * abs(x)
+
+
+def average_endpoint_error(labels, predictions):
+ """
+ Given labels and predictions of size (N, H, W, 2), calculates average endpoint error:
+ sqrt[sum_across_channels{(X - Y)^2}]
+ """
+ num_samples = predictions.shape.as_list()[0]
+ with tf.name_scope(None, "average_endpoint_error", (predictions, labels)) as scope:
+ predictions = tf.to_float(predictions)
+ labels = tf.to_float(labels)
+ predictions.get_shape().assert_is_compatible_with(labels.get_shape())
+
+ squared_difference = tf.square(tf.subtract(predictions, labels))
+ # sum across channels: sum[(X - Y)^2] -> N, H, W, 1
+ loss = tf.reduce_sum(squared_difference, 3, keep_dims=True)
+ loss = tf.sqrt(loss)
+ return tf.reduce_sum(loss) / num_samples
+
+
+def pad(tensor, num=1):
+ """
+ Pads the given tensor along the height and width dimensions with `num` 0s on each side
+ """
+ return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT")
+
+
+def antipad(tensor, num=1):
+ """
+ Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes
+ padding from the output rather than adding it to the input.
+ """
+ batch, h, w, c = tensor.get_shape().as_list()
+ # print(batch, h, w, c)
+ # print(type(batch), type(h), type(w), type(c))
+ # return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c])
+ return tensor[:, num: num + h - 2 * num, num: num + w - 2 * num, :]
diff --git a/Codes/flownet2/test.py b/Codes/flownet2/test.py
new file mode 100644
index 0000000..2fcb380
--- /dev/null
+++ b/Codes/flownet2/test.py
@@ -0,0 +1,163 @@
+import os
+import tensorflow as tf
+import numpy as np
+from scipy.misc import imread
+import matplotlib
+from src.flowlib import read_flow, flow_to_image
+matplotlib.use('TKAgg')
+import matplotlib.pyplot as plt
+
+_preprocessing_ops = tf.load_op_library(
+ tf.resource_loader.get_path_to_datafile("./src/ops/build/preprocessing.so"))
+
+
+def display(img, c):
+ plt.subplot(int('22' + str(c + 1)))
+ plt.imshow(img[0, :, :, :])
+
+
+def main():
+ """
+.Input("image_a: float32")
+.Input("image_b: float32")
+.Attr("crop: list(int) >= 2")
+.Attr("params_a_name: list(string)")
+.Attr("params_a_rand_type: list(string)")
+.Attr("params_a_exp: list(bool)")
+.Attr("params_a_mean: list(float32)")
+.Attr("params_a_spread: list(float32)")
+.Attr("params_a_prob: list(float32)")
+.Attr("params_b_name: list(string)")
+.Attr("params_b_rand_type: list(string)")
+.Attr("params_b_exp: list(bool)")
+.Attr("params_b_mean: list(float32)")
+.Attr("params_b_spread: list(float32)")
+.Attr("params_b_prob: list(float32)")
+.Output("aug_image_a: float32")
+.Output("aug_image_b: float32")
+.Output("spatial_transform_a: float32")
+.Output("inv_spatial_transform_b: float32")
+ """
+
+ crop = [364, 492]
+ params_a_name = ['translate_x', 'translate_y']
+ params_a_rand_type = ['uniform_bernoulli', 'uniform_bernoulli']
+ params_a_exp = [False, False]
+ params_a_mean = [0.0, 0.0]
+ params_a_spread = [0.4, 0.4]
+ params_a_prob = [1.0, 1.0]
+ params_b_name = []
+ params_b_rand_type = []
+ params_b_exp = []
+ params_b_mean = []
+ params_b_spread = []
+ params_b_prob = []
+
+ with tf.Session() as sess:
+ with tf.device('/gpu:0'):
+ image_a = imread('./img0.ppm') / 255.0
+ image_b = imread('./img1.ppm') / 255.0
+ flow = read_flow('./flow.flo')
+
+ image_a_tf = tf.expand_dims(tf.to_float(tf.constant(image_a, dtype=tf.float64)), 0)
+ image_b_tf = tf.expand_dims(tf.to_float(tf.constant(image_b, dtype=tf.float64)), 0)
+
+ preprocess = _preprocessing_ops.data_augmentation(image_a_tf,
+ image_b_tf,
+ crop,
+ params_a_name,
+ params_a_rand_type,
+ params_a_exp,
+ params_a_mean,
+ params_a_spread,
+ params_a_prob,
+ params_b_name,
+ params_b_rand_type,
+ params_b_exp,
+ params_b_mean,
+ params_b_spread,
+ params_b_prob)
+
+ out = sess.run(preprocess)
+ trans = out.spatial_transform_a
+ inv_trans = out.inv_spatial_transform_b
+
+ print(trans.shape)
+ print(inv_trans.shape)
+
+ flow_tf = tf.expand_dims(tf.to_float(tf.constant(flow)), 0)
+ aug_flow_tf = _preprocessing_ops.flow_augmentation(flow_tf, trans, inv_trans, crop)
+
+ aug_flow = sess.run(aug_flow_tf)[0, :, :, :]
+
+ # Plot img0, img0aug
+ plt.subplot(321)
+ plt.imshow(image_a)
+ plt.subplot(322)
+ plt.imshow(out.aug_image_a[0, :, :, :])
+
+ # Plot img1, img1aug
+ plt.subplot(323)
+ plt.imshow(image_b)
+ plt.subplot(324)
+ plt.imshow(out.aug_image_b[0, :, :, :])
+
+ # Plot flow, flowaug
+ plt.subplot(325)
+ plt.imshow(flow_to_image(flow))
+ plt.subplot(326)
+ plt.imshow(flow_to_image(aug_flow))
+
+ plt.show()
+
+ # image_b_aug = sess.run(image_b_tf)
+ #
+ # display(np.expand_dims(image_a, 0), 0)
+ # display(np.expand_dims(image_b, 0), 1)
+ # display(image_a_aug, 2)
+ # display(image_b_aug, 3)
+ # plt.show()
+
+ # o = _preprocessing_ops.flow_augmentation(flow, trans, inv_t, [4, 8])
+ # print n[:, :, :]
+ # print n[0, 0, 1], n[0, 0, 0]
+ # print n[1, 0, 1], n[1, 0, 0]
+ # print n[2, 0, 1], n[2, 0, 0]
+ # print '---'
+ # print sess.run(o)
+
+ """# Goes along width first!!
+ // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w)
+ // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k)
+
+ H=5, W=10, K=2
+ n=0, h=1, w=5, k=0
+
+ (2 * 10) + c
+
+ 30 49 n[0, 1, 5, 0]"""
+
+
+print(os.getpid())
+input("Press Enter to continue...")
+main()
+
+# Last index is channel!!
+
+# K
+
+# value 13 should be at [0, 2, 7, 1] aka batch=0, height=1, width=0, channel=0. it is at index=20.
+#
+# items = {
+# 'N': [0, 0],
+# 'H': [5, 2],
+# 'W': [10, 7],
+# 'K': [2, 1],
+# }
+#
+# for (i1, v1) in items.iteritems():
+# for (i2, v2) in items.iteritems():
+# for (i3, v3) in items.iteritems():
+# for (i4, v4) in items.iteritems():
+# if ((v1[1] * v2[0] + v2[1]) * v3[0] + v3[1]) * v4[0] + v4[1] == 55:
+# print 'found it: ', i1, i2, i3, i4
diff --git a/Codes/inference.py b/Codes/inference.py
new file mode 100644
index 0000000..0263339
--- /dev/null
+++ b/Codes/inference.py
@@ -0,0 +1,149 @@
+import tensorflow as tf
+import os
+import time
+import numpy as np
+import pickle
+
+
+from models import generator
+from utils import DataLoader, load, save, psnr_error
+from constant import const
+import evaluate
+
+
+slim = tf.contrib.slim
+
+os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
+os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU
+
+dataset_name = const.DATASET
+test_folder = const.TEST_FOLDER
+
+num_his = const.NUM_HIS
+height, width = 256, 256
+
+snapshot_dir = const.SNAPSHOT_DIR
+psnr_dir = const.PSNR_DIR
+evaluate_name = const.EVALUATE
+
+print(const)
+
+
+# define dataset
+with tf.name_scope('dataset'):
+ test_video_clips_tensor = tf.placeholder(shape=[1, height, width, 3 * (num_his + 1)],
+ dtype=tf.float32)
+ test_inputs = test_video_clips_tensor[..., 0:num_his*3]
+ test_gt = test_video_clips_tensor[..., -3:]
+ print('test inputs = {}'.format(test_inputs))
+ print('test prediction gt = {}'.format(test_gt))
+
+# define testing generator function and
+# in testing, only generator networks, there is no discriminator networks and flownet.
+with tf.variable_scope('generator', reuse=None):
+ print('testing = {}'.format(tf.get_variable_scope().name))
+ test_outputs = generator(test_inputs, layers=4, output_channel=3)
+ test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)
+
+
+config = tf.ConfigProto()
+config.gpu_options.allow_growth = True
+with tf.Session(config=config) as sess:
+ # dataset
+ data_loader = DataLoader(test_folder, height, width)
+
+ # initialize weights
+ sess.run(tf.global_variables_initializer())
+ print('Init global successfully!')
+
+ # tf saver
+ saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
+
+ restore_var = [v for v in tf.global_variables()]
+ loader = tf.train.Saver(var_list=restore_var)
+
+ def inference_func(ckpt, dataset_name, evaluate_name):
+ load(loader, sess, ckpt)
+
+ psnr_records = []
+ videos_info = data_loader.videos
+ num_videos = len(videos_info.keys())
+ total = 0
+ timestamp = time.time()
+
+ for video_name, video in videos_info.items():
+ length = video['length']
+ total += length
+ psnrs = np.empty(shape=(length,), dtype=np.float32)
+
+ for i in range(num_his, length):
+ video_clip = data_loader.get_video_clips(video_name, i - num_his, i + 1)
+ psnr = sess.run(test_psnr_error,
+ feed_dict={test_video_clips_tensor: video_clip[np.newaxis, ...]})
+ psnrs[i] = psnr
+
+ print('video = {} / {}, i = {} / {}, psnr = {:.6f}'.format(
+ video_name, num_videos, i, length, psnr))
+
+ psnrs[0:num_his] = psnrs[num_his]
+ psnr_records.append(psnrs)
+
+ result_dict = {'dataset': dataset_name, 'psnr': psnr_records, 'flow': [], 'names': [], 'diff_mask': []}
+
+ used_time = time.time() - timestamp
+ print('total time = {}, fps = {}'.format(used_time, total / used_time))
+
+ # TODO specify what's the actual name of ckpt.
+ pickle_path = os.path.join(psnr_dir, os.path.split(ckpt)[-1])
+ with open(pickle_path, 'wb') as writer:
+ pickle.dump(result_dict, writer, pickle.HIGHEST_PROTOCOL)
+
+ results = evaluate.evaluate(evaluate_name, pickle_path)
+ print(results)
+
+
+ if os.path.isdir(snapshot_dir):
+ def check_ckpt_valid(ckpt_name):
+ is_valid = False
+ ckpt = ''
+ if ckpt_name.startswith('model.ckpt-'):
+ ckpt_name_splits = ckpt_name.split('.')
+ ckpt = str(ckpt_name_splits[0]) + '.' + str(ckpt_name_splits[1])
+ ckpt_path = os.path.join(snapshot_dir, ckpt)
+ if os.path.exists(ckpt_path + '.index') and os.path.exists(ckpt_path + '.meta') and \
+ os.path.exists(ckpt_path + '.data-00000-of-00001'):
+ is_valid = True
+
+ return is_valid, ckpt
+
+ def scan_psnr_folder():
+ tested_ckpt_in_psnr_sets = set()
+ for test_psnr in os.listdir(psnr_dir):
+ tested_ckpt_in_psnr_sets.add(test_psnr)
+ return tested_ckpt_in_psnr_sets
+
+ def scan_model_folder():
+ saved_models = set()
+ for ckpt_name in os.listdir(snapshot_dir):
+ is_valid, ckpt = check_ckpt_valid(ckpt_name)
+ if is_valid:
+ saved_models.add(ckpt)
+ return saved_models
+
+ tested_ckpt_sets = scan_psnr_folder()
+ while True:
+ all_model_ckpts = scan_model_folder()
+ new_model_ckpts = all_model_ckpts - tested_ckpt_sets
+
+ for ckpt_name in new_model_ckpts:
+ # inference
+ ckpt = os.path.join(snapshot_dir, ckpt_name)
+ inference_func(ckpt, dataset_name, evaluate_name)
+
+ tested_ckpt_sets.add(ckpt_name)
+
+ print('waiting for models...')
+ evaluate.evaluate('compute_auc', psnr_dir)
+ time.sleep(60)
+ else:
+ inference_func(snapshot_dir, dataset_name, evaluate_name)
diff --git a/Codes/loss_functions.py b/Codes/loss_functions.py
new file mode 100644
index 0000000..ca97966
--- /dev/null
+++ b/Codes/loss_functions.py
@@ -0,0 +1,54 @@
+import tensorflow as tf
+import numpy as np
+
+
+def flow_loss(gen_flows, gt_flows):
+ print(gen_flows['flow'])
+ return tf.reduce_mean(tf.abs(gen_flows['flow'] - gt_flows['flow']))
+
+
+def intensity_loss(gen_frames, gt_frames, l_num):
+ """
+ Calculates the sum of lp losses between the predicted and ground truth frames.
+
+ @param gen_frames: The predicted frames at each scale.
+ @param gt_frames: The ground truth frames at each scale
+ @param l_num: 1 or 2 for l1 and l2 loss, respectively).
+
+ @return: The lp loss.
+ """
+ return tf.reduce_mean(tf.abs((gen_frames - gt_frames) ** l_num))
+
+
+def gradient_loss(gen_frames, gt_frames, alpha):
+ """
+ Calculates the sum of GDL losses between the predicted and ground truth frames.
+
+ @param gen_frames: The predicted frames at each scale.
+ @param gt_frames: The ground truth frames at each scale
+ @param alpha: The power to which each gradient term is raised.
+
+ @return: The GDL loss.
+ """
+ # calculate the loss for each scale
+ # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
+
+ channels = gen_frames.get_shape().as_list()[-1]
+ pos = tf.constant(np.identity(channels), dtype=tf.float32) # 3 x 3
+ neg = -1 * pos
+ filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1]
+ filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]]
+ strides = [1, 1, 1, 1] # stride of (1, 1)
+ padding = 'SAME'
+
+ gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
+ gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
+ gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
+ gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))
+
+ grad_diff_x = tf.abs(gt_dx - gen_dx)
+ grad_diff_y = tf.abs(gt_dy - gen_dy)
+
+ # condense into one tensor and avg
+ return tf.reduce_mean(grad_diff_x ** alpha + grad_diff_y ** alpha)
+
diff --git a/Codes/models.py b/Codes/models.py
new file mode 100644
index 0000000..8c20134
--- /dev/null
+++ b/Codes/models.py
@@ -0,0 +1,44 @@
+import tensorflow as tf
+
+import unet
+import pix2pix
+
+from flownet2.src.flowlib import flow_to_image
+from flownet2.src.flownet_sd.flownet_sd import FlowNetSD # Ok
+from flownet2.src.training_schedules import LONG_SCHEDULE
+from flownet2.src.net import Mode
+
+
+slim = tf.contrib.slim
+
+
+def generator(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=3):
+ return unet.unet(inputs, layers, features_root, filter_size, pool_size, output_channel)
+
+
+def discriminator(inputs, num_filers=(128, 256, 512, 512)):
+ logits, end_points = pix2pix.pix2pix_discriminator(inputs, num_filers)
+ return logits, end_points['predictions']
+
+
+def flownet(input_a, input_b, height, width, reuse=None):
+ net = FlowNetSD(mode=Mode.TEST)
+ # train preds flow
+ input_a = (input_a + 1.0) / 2.0 # flownet receives image with color space in [0, 1]
+ input_b = (input_b + 1.0) / 2.0 # flownet receives image with color space in [0, 1]
+ # input size is 384 x 512
+ input_a = tf.image.resize_images(input_a, [height, width])
+ input_b = tf.image.resize_images(input_b, [height, width])
+ flows = net.model(
+ inputs={'input_a': input_a, 'input_b': input_b},
+ training_schedule=LONG_SCHEDULE,
+ trainable=False, reuse=reuse
+ )
+ return flows['flow']
+
+
+def initialize_flownet(sess, checkpoint):
+ flownet_vars = slim.get_variables_to_restore(include=['FlowNetSD'])
+ flownet_saver = tf.train.Saver(flownet_vars)
+ print('FlownetSD restore from {}!'.format(checkpoint))
+ flownet_saver.restore(sess, checkpoint)
diff --git a/Codes/models/download_pretrains.sh b/Codes/models/download_pretrains.sh
new file mode 100644
index 0000000..08e58ec
--- /dev/null
+++ b/Codes/models/download_pretrains.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+echo "Downloading trained models on ped1, ped2 and avenue datasets ....."
+
+wget "https://ofhz9a.bn.files.1drv.com/y4mHfGdUxGoa7NnnI-eIlTqInymvmHyDOSGGw5zKM08jOGukHKdYdxmtZiEEh-rCAWK7oTDTstQ5bKazvjdyTtsIUW7zxcKnVgIsgZg6DpEb-Qdq83Zmnnw6nv7pX5HhiOkMxc42CLl65QK0A2Mv1Cmj-062Pyodm-Mt5r24Id3_glS0NT6BdvAp7-VbevkXygnmXQrcXRQU6d0y1cHlZJ2ig/pretrains.tar.gz"
+tar -xvf pretrains.tar.gz
+rm pretrains.tar.gz
+
+echo "Download pretrains successfully..."
+
+
diff --git a/Codes/pix2pix.py b/Codes/pix2pix.py
new file mode 100644
index 0000000..941c8fc
--- /dev/null
+++ b/Codes/pix2pix.py
@@ -0,0 +1,274 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Implementation of the Image-to-Image Translation model.
+This network represents a port of the following work:
+ Image-to-Image Translation with Conditional Adversarial Networks
+ Phillip Isola, Jun-Yan Zhu, Tinghui Zhou and Alexei A. Efros
+ Arxiv, 2017
+ https://phillipi.github.io/pix2pix/
+A reference implementation written in Lua can be found at:
+https://github.com/phillipi/pix2pix/blob/master/models.lua
+"""
+import collections
+import functools
+
+import tensorflow as tf
+
+layers = tf.contrib.layers
+
+
+def pix2pix_arg_scope():
+ """Returns a default argument scope for isola_net.
+ Returns:
+ An arg scope.
+ """
+ # These parameters come from the online port, which don't necessarily match
+ # those in the paper.
+ # TODO(nsilberman): confirm these values with Philip.
+ instance_norm_params = {
+ 'center': True,
+ 'scale': True,
+ 'epsilon': 0.00001,
+ }
+
+ with tf.contrib.framework.arg_scope(
+ [layers.conv2d, layers.conv2d_transpose],
+ normalizer_fn=layers.instance_norm,
+ normalizer_params=instance_norm_params,
+ weights_initializer=tf.random_normal_initializer(0, 0.02)) as sc:
+ return sc
+
+
+def upsample(net, num_outputs, kernel_size, method='nn_upsample_conv'):
+ """Upsamples the given inputs.
+ Args:
+ net: A `Tensor` of size [batch_size, height, width, filters].
+ num_outputs: The number of output filters.
+ kernel_size: A list of 2 scalars or a 1x2 `Tensor` indicating the scale,
+ relative to the inputs, of the output dimensions. For example, if kernel
+ size is [2, 3], then the output height and width will be twice and three
+ times the input size.
+ method: The upsampling method.
+ Returns:
+ An `Tensor` which was upsampled using the specified method.
+ Raises:
+ ValueError: if `method` is not recognized.
+ """
+ net_shape = tf.shape(net)
+ height = net_shape[1]
+ width = net_shape[2]
+
+ if method == 'nn_upsample_conv':
+ net = tf.image.resize_nearest_neighbor(
+ net, [kernel_size[0] * height, kernel_size[1] * width])
+ net = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None)
+ elif method == 'conv2d_transpose':
+ net = layers.conv2d_transpose(
+ net, num_outputs, [4, 4], stride=kernel_size, activation_fn=None)
+ else:
+ raise ValueError('Unknown method: [%s]', method)
+
+ return net
+
+
+class Block(
+ collections.namedtuple('Block', ['num_filters', 'decoder_keep_prob'])):
+ """Represents a single block of encoder and decoder processing.
+ The Image-to-Image translation paper works a bit differently than the original
+ U-Net model. In particular, each block represents a single operation in the
+ encoder which is concatenated with the corresponding decoder representation.
+ A dropout layer follows the concatenation and convolution of the concatenated
+ features.
+ """
+ pass
+
+
+def _default_generator_blocks():
+ """Returns the default generator block definitions.
+ Returns:
+ A list of generator blocks.
+ """
+ return [
+ Block(64, 0.5),
+ Block(128, 0.5),
+ Block(256, 0.5),
+ Block(512, 0),
+ Block(512, 0),
+ Block(512, 0),
+ Block(512, 0),
+ ]
+
+
+def pix2pix_generator(net,
+ num_outputs,
+ blocks=None,
+ upsample_method='nn_upsample_conv',
+ is_training=False): # pylint: disable=unused-argument
+ """Defines the network architecture.
+ Args:
+ net: A `Tensor` of size [batch, height, width, channels]. Note that the
+ generator currently requires square inputs (e.g. height=width).
+ num_outputs: The number of (per-pixel) outputs.
+ blocks: A list of generator blocks or `None` to use the default generator
+ definition.
+ upsample_method: The method of upsampling images, one of 'nn_upsample_conv'
+ or 'conv2d_transpose'
+ is_training: Whether or not we're in training or testing mode.
+ Returns:
+ A `Tensor` representing the model output and a dictionary of model end
+ points.
+ Raises:
+ ValueError: if the input heights do not match their widths.
+ """
+ end_points = {}
+
+ blocks = blocks or _default_generator_blocks()
+
+ input_size = net.get_shape().as_list()
+ height, width = input_size[1], input_size[2]
+ if height != width:
+ raise ValueError('The input height must match the input width.')
+
+ input_size[3] = num_outputs
+
+ upsample_fn = functools.partial(upsample, method=upsample_method)
+
+ encoder_activations = []
+
+ ###########
+ # Encoder #
+ ###########
+ with tf.variable_scope('encoder'):
+ with tf.contrib.framework.arg_scope(
+ [layers.conv2d],
+ kernel_size=[4, 4],
+ stride=2,
+ activation_fn=tf.nn.leaky_relu):
+
+ for block_id, block in enumerate(blocks):
+ # No normalizer for the first encoder layers as per 'Image-to-Image',
+ # Section 5.1.1
+ if block_id == 0:
+ # First layer doesn't use normalizer_fn
+ net = layers.conv2d(net, block.num_filters, normalizer_fn=None)
+ elif block_id < len(blocks) - 1:
+ net = layers.conv2d(net, block.num_filters)
+ else:
+ # Last layer doesn't use activation_fn nor normalizer_fn
+ net = layers.conv2d(
+ net, block.num_filters, activation_fn=None, normalizer_fn=None)
+
+ encoder_activations.append(net)
+ end_points['encoder%d' % block_id] = net
+
+ ###########
+ # Decoder #
+ ###########
+ reversed_blocks = list(blocks)
+ reversed_blocks.reverse()
+
+ with tf.variable_scope('decoder'):
+ # Dropout is used at both train and test time as per 'Image-to-Image',
+ # Section 2.1 (last paragraph).
+ with tf.contrib.framework.arg_scope([layers.dropout], is_training=is_training):
+
+ for block_id, block in enumerate(reversed_blocks):
+ if block_id > 0:
+ net = tf.concat([net, encoder_activations[-block_id - 1]], axis=3)
+
+ # The Relu comes BEFORE the upsample op:
+ net = tf.nn.relu(net)
+ net = upsample_fn(net, block.num_filters, [2, 2])
+ if block.decoder_keep_prob > 0:
+ net = layers.dropout(net, keep_prob=block.decoder_keep_prob)
+ end_points['decoder%d' % block_id] = net
+
+ with tf.variable_scope('output'):
+ logits = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None)
+ # print(logits)
+ # logits = tf.reshape(logits, input_size)
+
+ end_points['logits'] = logits
+ end_points['predictions'] = tf.tanh(logits)
+
+ return logits, end_points
+
+
+def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
+ """Creates the Image2Image Translation Discriminator.
+ Args:
+ net: A `Tensor` of size [batch_size, height, width, channels] representing
+ the input.
+ num_filters: A list of the filters in the discriminator. The length of the
+ list determines the number of layers in the discriminator.
+ padding: Amount of reflection padding applied before each convolution.
+ is_training: Whether or not the model is training or testing.
+ Returns:
+ A logits `Tensor` of size [batch_size, N, N, 1] where N is the number of
+ 'patches' we're attempting to discriminate and a dictionary of model end
+ points.
+ """
+ del is_training
+ end_points = {}
+
+ num_layers = len(num_filters)
+
+ def padded(net, scope):
+ if padding:
+ with tf.variable_scope(scope):
+ spatial_pad = tf.constant(
+ [[0, 0], [padding, padding], [padding, padding], [0, 0]],
+ dtype=tf.int32)
+ return tf.pad(net, spatial_pad, 'REFLECT')
+ else:
+ return net
+
+ with tf.contrib.framework.arg_scope(
+ [layers.conv2d],
+ kernel_size=[4, 4],
+ stride=2,
+ padding='valid',
+ activation_fn=tf.nn.leaky_relu):
+
+ # No normalization on the input layer.
+ net = layers.conv2d(
+ padded(net, 'conv0'), num_filters[0], normalizer_fn=None, scope='conv0')
+
+ end_points['conv0'] = net
+
+ for i in range(1, num_layers - 1):
+ net = layers.conv2d(
+ padded(net, 'conv%d' % i), num_filters[i], scope='conv%d' % i)
+ end_points['conv%d' % i] = net
+
+ # Stride 1 on the last layer.
+ net = layers.conv2d(
+ padded(net, 'conv%d' % (num_layers - 1)),
+ num_filters[-1],
+ stride=1,
+ scope='conv%d' % (num_layers - 1))
+ end_points['conv%d' % (num_layers - 1)] = net
+
+ # 1-dim logits, stride 1, no activation, no normalization.
+ logits = layers.conv2d(
+ padded(net, 'conv%d' % num_layers),
+ 1,
+ stride=1,
+ activation_fn=None,
+ normalizer_fn=None,
+ scope='conv%d' % num_layers)
+ end_points['logits'] = logits
+ end_points['predictions'] = tf.sigmoid(logits)
+ return logits, end_points
diff --git a/Codes/requirements.txt b/Codes/requirements.txt
new file mode 100644
index 0000000..91d2206
--- /dev/null
+++ b/Codes/requirements.txt
@@ -0,0 +1,9 @@
+numpy==1.14.1
+scipy==1.0.0
+matplotlib==2.1.2
+tensorflow==1.4.1
+tensorflow_gpu==1.4.1
+Pillow==5.0.0
+pypng==0.0.18
+scikit_learn==0.19.1
+opencv-python==3.2.0.6
diff --git a/Codes/runner.sh b/Codes/runner.sh
new file mode 100644
index 0000000..f0b545f
--- /dev/null
+++ b/Codes/runner.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+/home/liuwen/ssd/datasets/ped2/training/frames
+/home/liuwen/ssd/datasets/ped2/testing/frames
+
+python train.py --dataset ped2 \
+ --train_folder ../Data/ped2/training/frames \
+ --test_folder ../Data/ped2/testing/frames \
+ --gpu 0 \
+ --iters 80000
+
+
+python inference.py --dataset ped2 \
+ --test_folder /home/liuwen/ssd/datasets/ped2/testing/frames \
+ --gpu 3 \
+ --snapshot_dir models/pretrains/ped2
+
+
+python train.py --dataset avenue \
+ --train_folder ../Data/avenue/training/frames \
+ --test_folder ../Data/avenue/testing/frames \
+ --gpu 2 \
+ --iters 80000
+
+python inference.py --dataset avenue \
+ --test_folder ../Data/avenue/testing/frames \
+ --gpu 3
+
+
+python train.py --dataset ped1 \
+ --train_folder ../Data/ped1/training/frames \
+ --test_folder ../Data/ped1/testing/frames \
+ --gpu 2 \
+ --iters 80000
+
+python inference.py --dataset ped1 \
+ --test_folder ../Data/ped1/testing/frames \
+ --gpu 3
+
+python train.py --dataset ped1 \
+ --train_folder ../Data/ped1/training/frames \
+ --test_folder ../Data/ped1/testing/frames \
+ --gpu 0 \
+ --iters 80000 \
+ --config training_hyper_params/hyper_params_lp_0.ini
+
+python inference.py --dataset ped1 \
+ --test_folder ../Data/ped1/testing/frames \
+ --gpu 1 \
+ --config training_hyper_params/hyper_params_lp_0.ini
+
+
+python inference.py --dataset ped2 \
+ --test_folder /home/liuwen/ssd/datasets/ped2/testing/frames \
+ --gpu 1 \
+ --snapshot_dir models/pretrains/ped2 \ No newline at end of file
diff --git a/Codes/train.py b/Codes/train.py
new file mode 100644
index 0000000..42a8fc9
--- /dev/null
+++ b/Codes/train.py
@@ -0,0 +1,215 @@
+import tensorflow as tf
+import os
+
+from models import generator, discriminator, flownet, initialize_flownet
+from loss_functions import intensity_loss, gradient_loss
+from utils import DataLoader, load, save, psnr_error
+from constant import const
+
+
+os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
+os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU
+
+dataset_name = const.DATASET
+train_folder = const.TRAIN_FOLDER
+test_folder = const.TEST_FOLDER
+
+batch_size = const.BATCH_SIZE
+iterations = const.ITERATIONS
+num_his = const.NUM_HIS
+height, width = 256, 256
+flow_height, flow_width = const.FLOW_HEIGHT, const.FLOW_WIDTH
+
+l_num = const.L_NUM
+alpha_num = const.ALPHA_NUM
+lam_lp = const.LAM_LP
+lam_gdl = const.LAM_GDL
+lam_adv = const.LAM_ADV
+lam_flow = const.LAM_FLOW
+adversarial = (lam_adv != 0)
+
+summary_dir = const.SUMMARY_DIR
+snapshot_dir = const.SNAPSHOT_DIR
+
+
+print(const)
+
+# define dataset
+with tf.name_scope('dataset'):
+ train_loader = DataLoader(train_folder, resize_height=height, resize_width=width)
+ train_dataset = train_loader(batch_size=batch_size, time_steps=num_his, num_pred=1)
+
+ train_it = train_dataset.make_one_shot_iterator()
+ train_videos_clips_tensor = train_it.get_next()
+ train_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)])
+
+ train_inputs = train_videos_clips_tensor[..., 0:num_his*3]
+ train_gt = train_videos_clips_tensor[..., -3:]
+
+ print('train inputs = {}'.format(train_inputs))
+ print('train prediction gt = {}'.format(train_gt))
+
+ test_loader = DataLoader(test_folder, resize_height=height, resize_width=width)
+ test_dataset = test_loader(batch_size=batch_size, time_steps=num_his, num_pred=1)
+ test_it = test_dataset.make_one_shot_iterator()
+ test_videos_clips_tensor = test_it.get_next()
+ test_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)])
+
+ test_inputs = test_videos_clips_tensor[..., 0:num_his*3]
+ test_gt = test_videos_clips_tensor[..., -3:]
+
+ print('test inputs = {}'.format(test_inputs))
+ print('test prediction gt = {}'.format(test_gt))
+
+# define training generator function
+with tf.variable_scope('generator', reuse=None):
+ print('training = {}'.format(tf.get_variable_scope().name))
+ train_outputs = generator(train_inputs, layers=4, output_channel=3)
+ train_psnr_error = psnr_error(gen_frames=train_outputs, gt_frames=train_gt)
+
+# define testing generator function
+with tf.variable_scope('generator', reuse=True):
+ print('testing = {}'.format(tf.get_variable_scope().name))
+ test_outputs = generator(test_inputs, layers=4, output_channel=3)
+ test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)
+
+
+# define intensity loss
+if lam_lp != 0:
+ lp_loss = intensity_loss(gen_frames=train_outputs, gt_frames=train_gt, l_num=l_num)
+else:
+ lp_loss = tf.constant(0.0, dtype=tf.float32)
+
+
+# define gdl loss
+if lam_gdl != 0:
+ gdl_loss = gradient_loss(gen_frames=train_outputs, gt_frames=train_gt, alpha=alpha_num)
+else:
+ gdl_loss = tf.constant(0.0, dtype=tf.float32)
+
+
+# define flow loss
+if lam_flow != 0:
+ train_gt_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_gt,
+ height=flow_height, width=flow_width, reuse=None)
+ train_pred_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_outputs,
+ height=flow_height, width=flow_width, reuse=True)
+ flow_loss = tf.reduce_mean(tf.abs(train_gt_flow - train_pred_flow))
+else:
+ flow_loss = tf.constant(0.0, dtype=tf.float32)
+
+
+# define adversarial loss
+if adversarial:
+ with tf.variable_scope('discriminator', reuse=None):
+ real_logits, real_outputs = discriminator(inputs=train_gt)
+ with tf.variable_scope('discriminator', reuse=True):
+ fake_logits, fake_outputs = discriminator(inputs=train_outputs)
+
+ print('real_outputs = {}'.format(real_outputs))
+ print('fake_outputs = {}'.format(fake_outputs))
+
+ adv_loss = tf.reduce_mean(tf.square(fake_outputs - 1) / 2)
+ dis_loss = tf.reduce_mean(tf.square(real_outputs - 1) / 2) + tf.reduce_mean(tf.square(fake_outputs) / 2)
+else:
+ adv_loss = tf.constant(0.0, dtype=tf.float32)
+ dis_loss = tf.constant(0.0, dtype=tf.float32)
+
+
+with tf.name_scope('training'):
+ g_loss = tf.add_n([lp_loss * lam_lp, gdl_loss * lam_gdl, adv_loss * lam_adv, flow_loss * lam_flow], name='g_loss')
+
+ g_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='g_step')
+ g_lrate = tf.train.piecewise_constant(g_step, boundaries=const.LRATE_G_BOUNDARIES, values=const.LRATE_G)
+ g_optimizer = tf.train.AdamOptimizer(learning_rate=g_lrate, name='g_optimizer')
+ g_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
+
+ g_train_op = g_optimizer.minimize(g_loss, global_step=g_step, var_list=g_vars, name='g_train_op')
+
+ if adversarial:
+ # training discriminator
+ d_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='d_step')
+ d_lrate = tf.train.piecewise_constant(d_step, boundaries=const.LRATE_D_BOUNDARIES, values=const.LRATE_D)
+ d_optimizer = tf.train.AdamOptimizer(learning_rate=d_lrate, name='g_optimizer')
+ d_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')
+
+ d_train_op = d_optimizer.minimize(dis_loss, global_step=d_step, var_list=d_vars, name='d_optimizer')
+ else:
+ d_step = None
+ d_lrate = None
+ d_train_op = None
+
+# add all to summaries
+tf.summary.scalar(tensor=train_psnr_error, name='train_psnr_error')
+tf.summary.scalar(tensor=test_psnr_error, name='test_psnr_error')
+tf.summary.scalar(tensor=g_loss, name='g_loss')
+tf.summary.scalar(tensor=adv_loss, name='adv_loss')
+tf.summary.scalar(tensor=dis_loss, name='dis_loss')
+tf.summary.image(tensor=train_outputs, name='train_outputs')
+tf.summary.image(tensor=train_gt, name='train_gt')
+tf.summary.image(tensor=test_outputs, name='test_outputs')
+tf.summary.image(tensor=test_gt, name='test_gt')
+summary_op = tf.summary.merge_all()
+
+config = tf.ConfigProto()
+config.gpu_options.allow_growth = True
+with tf.Session(config=config) as sess:
+ # summaries
+ summary_writer = tf.summary.FileWriter(summary_dir, graph=sess.graph)
+
+ # initialize weights
+ sess.run(tf.global_variables_initializer())
+ print('Init successfully!')
+
+ if lam_flow != 0:
+ # initialize flownet
+ initialize_flownet(sess, const.FLOWNET_CHECKPOINT)
+
+ # tf saver
+ saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
+ restore_var = [v for v in tf.global_variables()]
+ loader = tf.train.Saver(var_list=restore_var)
+ if os.path.isdir(snapshot_dir):
+ ckpt = tf.train.get_checkpoint_state(snapshot_dir)
+ if ckpt and ckpt.model_checkpoint_path:
+ load(loader, sess, ckpt.model_checkpoint_path)
+ else:
+ print('No checkpoint file found.')
+ else:
+ load(loader, sess, snapshot_dir)
+
+ _step, _loss, _summaries = 0, None, None
+ while _step < iterations:
+ try:
+ if adversarial:
+ print('Training discriminator...')
+ _, _d_lr, _d_step, _dis_loss = sess.run([d_train_op, d_lrate, d_step, dis_loss])
+ else:
+ _d_step = 0
+ _d_lr = 0
+ _dis_loss = 0
+
+ print('Training generator...')
+ _, _g_lr, _step, _lp_loss, _gdl_loss, _adv_loss, _flow_loss, _g_loss, _train_psnr, _summaries = sess.run(
+ [g_train_op, g_lrate, g_step, lp_loss, gdl_loss, adv_loss, flow_loss, g_loss, train_psnr_error, summary_op])
+
+ if _step % 10 == 0:
+ print('DiscriminatorModel: Step {} | Global Loss: {:.6f}, lr = {:.6f}'.format(_d_step, _dis_loss, _d_lr))
+ print('GeneratorModel : Step {}, lr = {:.6f}'.format(_step, _g_lr))
+ print(' Global Loss : ', _g_loss)
+ print(' intensity Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_lp_loss, lam_lp, _lp_loss * lam_lp))
+ print(' gradient Loss : ({:.4f} * {:.4f} = {:.4f})'.format( _gdl_loss, lam_gdl, _gdl_loss * lam_gdl))
+ print(' adversarial Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_adv_loss, lam_adv, _adv_loss * lam_adv))
+ print(' flownet Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_flow_loss, lam_flow, _flow_loss * lam_flow))
+ print(' PSNR Error : ', _train_psnr)
+ if _step % 100 == 0:
+ summary_writer.add_summary(_summaries, global_step=_step)
+ print('Save summaries...')
+
+ if _step % 1000 == 0:
+ save(saver, sess, snapshot_dir, _step)
+
+ except tf.errors.OutOfRangeError:
+ print('Finish successfully!')
+ save(saver, sess, snapshot_dir, _step)
+ break
diff --git a/Codes/training_hyper_params/hyper_params.ini b/Codes/training_hyper_params/hyper_params.ini
new file mode 100644
index 0000000..99dbf00
--- /dev/null
+++ b/Codes/training_hyper_params/hyper_params.ini
@@ -0,0 +1,103 @@
+[ped2]
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss
+LAM_LP = 1
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+# the percentage of the different frame loss
+LAM_FLOW = 2
+
+LRATE_G = [0.0001, 0.00001]
+LRATE_G_BOUNDARIES = [7000]
+
+LRATE_D = [0.00001, 0.000001]
+LRATE_D_BOUNDARIES = [7000]
+
+[ped1]
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss
+LAM_LP = 1
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+# the percentage of the different frame loss
+LAM_FLOW = 0.01
+
+LRATE_G = [0.0001, 0.00001]
+LRATE_G_BOUNDARIES = [40000]
+
+LRATE_D = [0.00001, 0.000001]
+LRATE_D_BOUNDARIES = [40000]
+
+
+[avenue]
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss,
+# we found in smaller lp is slightly better in avenue, but not too much difference.
+LAM_LP = 0
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+# the percentage of the different frame loss
+LAM_FLOW = 2
+
+LRATE_G = [0.0002, 0.00002]
+LRATE_G_BOUNDARIES = [100000]
+
+LRATE_D = [0.00002, 0.000002]
+LRATE_D_BOUNDARIES = [100000]
+
+
+[shanghaitech]
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss
+LAM_LP = 1
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+# the percentage of the different frame loss
+LAM_FLOW = 2
+
+LRATE_G = [0.0002, 0.00002]
+LRATE_G_BOUNDARIES = [50000]
+
+LRATE_D = [0.00002, 0.000002]
+LRATE_D_BOUNDARIES = [50000]
+
+
+[toydata]
+# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively)
+L_NUM = 2
+# the power to which each gradient term is raised in GDL loss
+ALPHA_NUM = 1
+# the percentage of the adversarial loss to use in the combined loss
+LAM_ADV = 0.05
+# the percentage of the lp loss to use in the combined loss
+LAM_LP = 1
+# the percentage of the GDL loss to use in the combined loss
+LAM_GDL = 1
+# the percentage of the different frame loss
+LAM_FLOW = 2
+
+LRATE_G = [0.0001, 0.00001]
+LRATE_G_BOUNDARIES = [7000]
+
+LRATE_D = [0.00001, 0.000001]
+LRATE_D_BOUNDARIES = [7000]
diff --git a/Codes/unet.py b/Codes/unet.py
new file mode 100644
index 0000000..ac4c6aa
--- /dev/null
+++ b/Codes/unet.py
@@ -0,0 +1,42 @@
+import tensorflow as tf
+from tensorflow.contrib.layers import conv2d, max_pool2d, conv2d_transpose
+
+
+def unet(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=1):
+ """
+ :param inputs: input tensor, shape[None, height, width, channel]
+ :param layers: number of layers
+ :param features_root: number of features in the first layer
+ :param filter_size: size of each conv layer
+ :param pool_size: size of each max pooling layer
+ :param output_channel: number of channel for output tensor
+ :return: a tensor, shape[None, height, width, output_channel]
+ """
+
+ in_node = inputs
+ conv = []
+ for layer in range(0, layers):
+ features = 2**layer*features_root
+
+ conv1 = conv2d(inputs=in_node, num_outputs=features, kernel_size=filter_size)
+ conv2 = conv2d(inputs=conv1, num_outputs=features, kernel_size=filter_size)
+ conv.append(conv2)
+
+ if layer < layers - 1:
+ in_node = max_pool2d(inputs=conv2, kernel_size=pool_size, padding='SAME')
+ # in_node = conv2d(inputs=conv2, num_outputs=features, kernel_size=filter_size, stride=2)
+
+ in_node = conv[-1]
+
+ for layer in range(layers-2, -1, -1):
+ features = 2**(layer+1)*features_root
+
+ h_deconv = conv2d_transpose(inputs=in_node, num_outputs=features//2, kernel_size=pool_size, stride=pool_size)
+ h_deconv_concat = tf.concat([conv[layer], h_deconv], axis=3)
+
+ conv1 = conv2d(inputs=h_deconv_concat, num_outputs=features//2, kernel_size=filter_size)
+ in_node = conv2d(inputs=conv1, num_outputs=features//2, kernel_size=filter_size)
+
+ output = conv2d(inputs=in_node, num_outputs=output_channel, kernel_size=filter_size, activation_fn=None)
+ output = tf.tanh(output)
+ return output
diff --git a/Codes/utils.py b/Codes/utils.py
new file mode 100644
index 0000000..efeab8e
--- /dev/null
+++ b/Codes/utils.py
@@ -0,0 +1,227 @@
+import tensorflow as tf
+import numpy as np
+from collections import OrderedDict
+import os
+import glob
+import cv2
+
+
+rng = np.random.RandomState(2017)
+
+
+def np_load_frame(filename, resize_height, resize_width):
+ image_decoded = cv2.imread(filename)
+ image_resized = cv2.resize(image_decoded, (resize_width, resize_height))
+ image_resized = image_resized.astype(dtype=np.float32)
+ image_resized = (image_resized / 127.5) - 1.0
+ return image_resized
+
+
+class DataLoader(object):
+ def __init__(self, video_folder, resize_height=256, resize_width=256):
+ self.dir = video_folder
+ self.videos = {}
+ self._resize_height = resize_height
+ self._resize_width = resize_width
+ self.setup()
+
+ def __call__(self, batch_size, time_steps, num_pred=1):
+ video_info_list = list(self.videos.values())
+ num_videos = len(video_info_list)
+
+ clip_length = time_steps + num_pred
+ resize_height, resize_width = self._resize_height, self._resize_width
+
+ def video_clip_generator():
+ v_id = -1
+ while True:
+ v_id = (v_id + 1) % num_videos
+
+ video_info = video_info_list[v_id]
+ start = rng.randint(0, video_info['length'] - clip_length)
+ video_clip = []
+ for frame_id in range(start, start + clip_length):
+ video_clip.append(np_load_frame(video_info['frame'][frame_id], resize_height, resize_width))
+ video_clip = np.concatenate(video_clip, axis=2)
+
+ yield video_clip
+
+ # video clip paths
+ dataset = tf.data.Dataset.from_generator(generator=video_clip_generator,
+ output_types=tf.float32,
+ output_shapes=[resize_height, resize_width, clip_length * 3])
+ print('generator dataset, {}'.format(dataset))
+ dataset = dataset.prefetch(buffer_size=1000)
+ dataset = dataset.shuffle(buffer_size=1000).batch(batch_size)
+ print('epoch dataset, {}'.format(dataset))
+
+ return dataset
+
+ def __getitem__(self, video_name):
+ assert video_name in self.videos.keys(), 'video = {} is not in {}!'.format(video_name, self.videos.keys())
+ return self.videos[video_name]
+
+ def setup(self):
+ videos = glob.glob(os.path.join(self.dir, '*'))
+ for video in sorted(videos):
+ video_name = video.split('/')[-1]
+ self.videos[video_name] = {}
+ self.videos[video_name]['path'] = video
+ self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
+ self.videos[video_name]['frame'].sort()
+ self.videos[video_name]['length'] = len(self.videos[video_name]['frame'])
+
+ def get_video_clips(self, video, start, end):
+ # assert video in self.videos, 'video = {} must in {}!'.format(video, self.videos.keys())
+ # assert start >= 0, 'start = {} must >=0!'.format(start)
+ # assert end <= self.videos[video]['length'], 'end = {} must <= {}'.format(video, self.videos[video]['length'])
+
+ batch = []
+ for i in range(start, end):
+ image = np_load_frame(self.videos[video]['frame'][i], self._resize_height, self._resize_width)
+ batch.append(image)
+
+ return np.concatenate(batch, axis=2)
+
+ # def get_video_clips(self, video_name, start, end):
+ # video_idx = np.arange(start, end)
+ # video_clip = np.empty(shape=[self._resize_height, self._resize_height, 3*len(video_idx)], dtype=np.float32)
+ # for idx, v_idx in enumerate(video_idx):
+ # filename = self.videos[video_name]['frame'][v_idx]
+ # video_clip[..., idx*3:(idx+1)*3] = np_load_frame(filename, self._resize_height, self._resize_width)
+ #
+ # return video_clip
+
+
+def log10(t):
+ """
+ Calculates the base-10 log of each element in t.
+
+ @param t: The tensor from which to calculate the base-10 log.
+
+ @return: A tensor with the base-10 log of each element in t.
+ """
+
+ numerator = tf.log(t)
+ denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
+ return numerator / denominator
+
+
+def psnr_error(gen_frames, gt_frames):
+ """
+ Computes the Peak Signal to Noise Ratio error between the generated images and the ground
+ truth images.
+
+ @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
+ generator model.
+ @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
+ each frame in gen_frames.
+
+ @return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the
+ batch.
+ """
+ shape = tf.shape(gen_frames)
+ num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
+ gt_frames = (gt_frames + 1.0) / 2.0
+ gen_frames = (gen_frames + 1.0) / 2.0
+ square_diff = tf.square(gt_frames - gen_frames)
+
+ batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3])))
+ return tf.reduce_mean(batch_errors)
+
+
+def sharp_diff_error(gen_frames, gt_frames, channels=3):
+ """
+ Computes the Sharpness Difference error between the generated images and the ground truth
+ images.
+
+ @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
+ generator model.
+ @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
+ each frame in gen_frames.
+ @param channels: The number of channels, 3 is RGB and 1 is Gray, default is 3.
+
+ @return: A scalar tensor. The Sharpness Difference error over each frame in the batch.
+ """
+ shape = tf.shape(gen_frames)
+ num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
+
+ # gradient difference
+ # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
+ # TODO: Could this be simplified with one filter [[-1, 2], [0, -1]]?
+ pos = tf.constant(np.identity(channels), dtype=tf.float32)
+ neg = -1 * pos
+ filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1]
+ filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]]
+ strides = [1, 1, 1, 1] # stride of (1, 1)
+ padding = 'SAME'
+
+ gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
+ gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
+ gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
+ gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))
+
+ gen_grad_sum = gen_dx + gen_dy
+ gt_grad_sum = gt_dx + gt_dy
+
+ grad_diff = tf.abs(gt_grad_sum - gen_grad_sum)
+
+ batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(grad_diff, [1, 2, 3])))
+ return tf.reduce_mean(batch_errors)
+
+
+def diff_mask(gen_frames, gt_frames, min_value=-1, max_value=1):
+ # normalize to [0, 1]
+ delta = max_value - min_value
+ gen_frames = (gen_frames - min_value) / delta
+ gt_frames = (gt_frames - min_value) / delta
+
+ gen_gray_frames = tf.image.rgb_to_grayscale(gen_frames)
+ gt_gray_frames = tf.image.rgb_to_grayscale(gt_frames)
+
+ diff = tf.abs(gen_gray_frames - gt_gray_frames)
+ return diff
+
+
+def load(saver, sess, ckpt_path):
+ saver.restore(sess, ckpt_path)
+ print("Restored model parameters from {}".format(ckpt_path))
+
+
+def save(saver, sess, logdir, step):
+ model_name = 'model.ckpt'
+ checkpoint_path = os.path.join(logdir, model_name)
+ if not os.path.exists(logdir):
+ os.makedirs(logdir)
+ saver.save(sess, checkpoint_path, global_step=step)
+ print('The checkpoint has been created.')
+
+
+# if __name__ == '__main__':
+# os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID"
+# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+#
+# data_loader = DataLoader('/home/liuwen/ssd/datasets/avenue/training/frames')
+# dataset, epoch_size = data_loader(10, 4, 1, 3, 1)
+#
+# # debug
+# iteration = dataset.make_one_shot_iterator()
+# batch_video_clip_tensor = iteration.get_next()
+#
+# config = tf.ConfigProto()
+# config.gpu_options.allow_growth = True
+# with tf.Session(config=config) as sess:
+# # batch_video_clip = sess.run(next(it))
+#
+# for i in range(100):
+# batch_video_clip = sess.run(batch_video_clip_tensor)
+# # print(batch_video_clip.shape)
+#
+# for vid, video_clip in enumerate(batch_video_clip):
+# for fid, frame in enumerate(video_clip):
+# print(i, vid, fid)
+# cv2.imshow('visualization', frame + 0.5)
+# cv2.waitKey(100)
+
+
+