diff options
Diffstat (limited to 'Codes')
82 files changed, 8146 insertions, 0 deletions
diff --git a/Codes/constant.py b/Codes/constant.py new file mode 100644 index 0000000..eafeab9 --- /dev/null +++ b/Codes/constant.py @@ -0,0 +1,153 @@ +import os +import argparse +import configparser + + +def get_dir(directory): + """ + get the directory, if no such directory, then make it. + + @param directory: The new directory. + """ + + if not os.path.exists(directory): + os.makedirs(directory) + + return directory + + +def parser_args(): + parser = argparse.ArgumentParser(description='Options to run the network.') + parser.add_argument('-g', '--gpu', type=str, default='0', + help='the device id of gpu.') + parser.add_argument('-i', '--iters', type=int, default=1, + help='set the number of iterations, default is 1') + parser.add_argument('-b', '--batch', type=int, default=4, + help='set the batch size, default is 4.') + parser.add_argument('--num_his', type=int, default=4, + help='set the time steps, default is 4.') + + parser.add_argument('-d', '--dataset', type=str, + help='the name of dataset.') + parser.add_argument('--train_folder', type=str, default='', + help='set the training folder path.') + parser.add_argument('--test_folder', type=str, default='', + help='set the testing folder path.') + + parser.add_argument('--config', type=str, default='training_hyper_params/hyper_params.ini', + help='the path of training_hyper_params, default is training_hyper_params/hyper_params.ini') + + parser.add_argument('--snapshot_dir', type=str, default='', + help='if it is folder, then it is the directory to save models, ' + 'if it is a specific model.ckpt-xxx, then the system will load it for testing.') + parser.add_argument('--summary_dir', type=str, default='', help='the directory to save summaries.') + parser.add_argument('--psnr_dir', type=str, default='', help='the directory to save psnrs results in testing.') + + parser.add_argument('--evaluate', type=str, default='compute_auc', + help='the evaluation metric, default is compute_auc') + + return parser.parse_args() + + +class Const(object): + class ConstError(TypeError): + pass + + class ConstCaseError(ConstError): + pass + + def __setattr__(self, name, value): + if name in self.__dict__: + raise self.ConstError("Can't change const.{}".format(name)) + if not name.isupper(): + raise self.ConstCaseError('const name {} is not all uppercase'.format(name)) + + self.__dict__[name] = value + + def __str__(self): + _str = '<================ Constants information ================>\n' + for name, value in self.__dict__.items(): + print(name, value) + _str += '\t{}\t{}\n'.format(name, value) + + return _str + + +args = parser_args() +const = Const() + +# inputs constants +const.DATASET = args.dataset +const.TRAIN_FOLDER = args.train_folder +const.TEST_FOLDER = args.test_folder + +const.GPU = args.gpu + +const.BATCH_SIZE = args.batch +const.NUM_HIS = args.num_his +const.ITERATIONS = args.iters + +const.EVALUATE = args.evaluate + +# network constants +const.HEIGHT = 256 +const.WIDTH = 256 +const.FLOWNET_CHECKPOINT = 'flownet2/checkpoints/FlowNetSD/flownet-SD.ckpt-0' +const.FLOW_HEIGHT = 384 +const.FLOW_WIDTH = 512 + +# set training hyper-parameters of different datasets +config = configparser.ConfigParser() +assert config.read(args.config) + +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +const.L_NUM = config.getint(const.DATASET, 'L_NUM') +# the power to which each gradient term is raised in GDL loss +const.ALPHA_NUM = config.getint(const.DATASET, 'ALPHA_NUM') +# the percentage of the adversarial loss to use in the combined loss +const.LAM_ADV = config.getfloat(const.DATASET, 'LAM_ADV') +# the percentage of the lp loss to use in the combined loss +const.LAM_LP = config.getfloat(const.DATASET, 'LAM_LP') +# the percentage of the GDL loss to use in the combined loss +const.LAM_GDL = config.getfloat(const.DATASET, 'LAM_GDL') +# the percentage of the different frame loss +const.LAM_FLOW = config.getfloat(const.DATASET, 'LAM_FLOW') + +# Learning rate of generator +const.LRATE_G = eval(config.get(const.DATASET, 'LRATE_G')) +const.LRATE_G_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_G_BOUNDARIES')) + +# Learning rate of discriminator +const.LRATE_D = eval(config.get(const.DATASET, 'LRATE_D')) +const.LRATE_D_BOUNDARIES = eval(config.get(const.DATASET, 'LRATE_D_BOUNDARIES')) + + +const.SAVE_DIR = '{dataset}_l_{L_NUM}_alpha_{ALPHA_NUM}_lp_{LAM_LP}_' \ + 'adv_{LAM_ADV}_gdl_{LAM_GDL}_flow_{LAM_FLOW}'.format(dataset=const.DATASET, + L_NUM=const.L_NUM, + ALPHA_NUM=const.ALPHA_NUM, + LAM_LP=const.LAM_LP, LAM_ADV=const.LAM_ADV, + LAM_GDL=const.LAM_GDL, LAM_FLOW=const.LAM_FLOW) + +if args.snapshot_dir: + # if the snapshot_dir is model.ckpt-xxx, which means it is the single model for testing. + if os.path.exists(args.snapshot_dir + '.meta') or os.path.exists(args.snapshot_dir + '.data-00000-of-00001') or \ + os.path.exists(args.snapshot_dir + '.index'): + const.SNAPSHOT_DIR = args.snapshot_dir + print(const.SNAPSHOT_DIR) + else: + const.SNAPSHOT_DIR = get_dir(os.path.join('models', const.SAVE_DIR + '_' + args.snapshot_dir)) +else: + const.SNAPSHOT_DIR = get_dir(os.path.join('models', const.SAVE_DIR)) + +if args.summary_dir: + const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR + '_' + args.summary_dir)) +else: + const.SUMMARY_DIR = get_dir(os.path.join('summary', const.SAVE_DIR)) + +if args.psnr_dir: + const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR + '_' + args.psnr_dir)) +else: + const.PSNR_DIR = get_dir(os.path.join('psnrs', const.SAVE_DIR)) + + diff --git a/Codes/evaluate.py b/Codes/evaluate.py new file mode 100644 index 0000000..2bce871 --- /dev/null +++ b/Codes/evaluate.py @@ -0,0 +1,576 @@ +import numpy as np +import scipy.io as scio +import os +import argparse +import pickle +from sklearn import metrics +import json +import socket + + +# data folder contain all datasets, such as ped1, ped2, avenue, shanghaitech, etc +# DATA_DIR = '../Data' +hostname = socket.gethostname() +if hostname == 'dl-T8520-G10': # 119 + DATA_DIR = '/home/liuwen/ssd/datasets' +elif hostname == 'admin' or hostname == 'compute101' or hostname == 'compute113' or hostname == 'compute106' \ + or hostname == 'compute107' or hostname == 'compute114': # node02 + DATA_DIR = '/home/luowx/liuwen/datasets' +elif hostname == 'gpu13' or 'gpu14': + DATA_DIR = '/public/home/gaoshenghua/liuwen/datasets' +else: + # raise NotImplementedError('Not found this machine {}!'.format(hostname)) + DATA_DIR = '../Data' + + +# normalize scores in each sub video +NORMALIZE = True + +# number of history frames, since in prediction based method, the first 4 frames can not be predicted, so that +# the first 4frames are undecidable, we just ignore the first 4 frames +DECIDABLE_IDX = 4 + + +def parser_args(): + parser = argparse.ArgumentParser(description='evaluating the model, computing the roc/auc.') + + parser.add_argument('-f', '--file', type=str, help='the path of loss file.') + parser.add_argument('-t', '--type', type=str, default='compute_auc', + help='the type of evaluation, choosing type is: plot_roc, compute_auc, ' + 'test_func\n, the default type is compute_auc') + return parser.parse_args() + + +class RecordResult(object): + def __init__(self, fpr=None, tpr=None, auc=-np.inf, dataset=None, loss_file=None): + self.fpr = fpr + self.tpr = tpr + self.auc = auc + self.dataset = dataset + self.loss_file = loss_file + + def __lt__(self, other): + return self.auc < other.auc + + def __gt__(self, other): + return self.auc > other.auc + + def __str__(self): + return 'dataset = {}, loss file = {}, auc = {}'.format(self.dataset, self.loss_file, self.auc) + + +class GroundTruthLoader(object): + AVENUE = 'avenue' + PED1 = 'ped1' + PED1_PIXEL_SUBSET = 'ped1_pixel_subset' + PED2 = 'ped2' + ENTRANCE = 'enter' + EXIT = 'exit' + SHANGHAITECH = 'shanghaitech' + SHANGHAITECH_LABEL_PATH = os.path.join(DATA_DIR, 'shanghaitech/testing/test_frame_mask') + TOY_DATA = 'toydata' + TOY_DATA_LABEL_PATH = os.path.join(DATA_DIR, TOY_DATA, 'toydata.json') + + NAME_MAT_MAPPING = { + AVENUE: os.path.join(DATA_DIR, 'avenue/avenue.mat'), + PED1: os.path.join(DATA_DIR, 'ped1/ped1.mat'), + PED2: os.path.join(DATA_DIR, 'ped2/ped2.mat'), + ENTRANCE: os.path.join(DATA_DIR, 'enter/enter.mat'), + EXIT: os.path.join(DATA_DIR, 'exit/exit.mat') + } + + NAME_FRAMES_MAPPING = { + AVENUE: os.path.join(DATA_DIR, 'avenue/testing/frames'), + PED1: os.path.join(DATA_DIR, 'ped1/testing/frames'), + PED2: os.path.join(DATA_DIR, 'ped2/testing/frames'), + ENTRANCE: os.path.join(DATA_DIR, 'enter/testing/frames'), + EXIT: os.path.join(DATA_DIR, 'exit/testing/frames') + } + + def __init__(self, mapping_json=None): + """ + Initial a ground truth loader, which loads the ground truth with given dataset name. + + :param mapping_json: the mapping from dataset name to the path of ground truth. + """ + + if mapping_json is not None: + with open(mapping_json, 'rb') as json_file: + self.mapping = json.load(json_file) + else: + self.mapping = GroundTruthLoader.NAME_MAT_MAPPING + + def __call__(self, dataset): + """ get the ground truth by provided the name of dataset. + + :type dataset: str + :param dataset: the name of dataset. + :return: np.ndarray, shape(#video) + np.array[0] contains all the start frame and end frame of abnormal events of video 0, + and its shape is (#frapsnr, ) + """ + + if dataset == GroundTruthLoader.SHANGHAITECH: + gt = self.__load_shanghaitech_gt() + elif dataset == GroundTruthLoader.TOY_DATA: + gt = self.__load_toydata_gt() + else: + gt = self.__load_ucsd_avenue_subway_gt(dataset) + return gt + + def __load_ucsd_avenue_subway_gt(self, dataset): + assert dataset in self.mapping, 'there is no dataset named {} \n Please check {}' \ + .format(dataset, GroundTruthLoader.NAME_MAT_MAPPING.keys()) + + mat_file = self.mapping[dataset] + abnormal_events = scio.loadmat(mat_file, squeeze_me=True)['gt'] + + if abnormal_events.ndim == 2: + abnormal_events = abnormal_events.reshape(-1, abnormal_events.shape[0], abnormal_events.shape[1]) + + num_video = abnormal_events.shape[0] + dataset_video_folder = GroundTruthLoader.NAME_FRAMES_MAPPING[dataset] + video_list = os.listdir(dataset_video_folder) + video_list.sort() + + assert num_video == len(video_list), 'ground true does not match the number of testing videos. {} != {}' \ + .format(num_video, len(video_list)) + + # get the total frames of sub video + def get_video_length(sub_video_number): + # video_name = video_name_template.format(sub_video_number) + video_name = os.path.join(dataset_video_folder, video_list[sub_video_number]) + assert os.path.isdir(video_name), '{} is not directory!'.format(video_name) + + length = len(os.listdir(video_name)) + + return length + + # need to test [].append, or np.array().append(), which one is faster + gt = [] + for i in range(num_video): + length = get_video_length(i) + + sub_video_gt = np.zeros((length,), dtype=np.int8) + sub_abnormal_events = abnormal_events[i] + if sub_abnormal_events.ndim == 1: + sub_abnormal_events = sub_abnormal_events.reshape((sub_abnormal_events.shape[0], -1)) + + _, num_abnormal = sub_abnormal_events.shape + + for j in range(num_abnormal): + # (start - 1, end - 1) + start = sub_abnormal_events[0, j] - 1 + end = sub_abnormal_events[1, j] + + sub_video_gt[start: end] = 1 + + gt.append(sub_video_gt) + + return gt + + @staticmethod + def __load_shanghaitech_gt(): + video_path_list = os.listdir(GroundTruthLoader.SHANGHAITECH_LABEL_PATH) + video_path_list.sort() + + gt = [] + for video in video_path_list: + # print(os.path.join(GroundTruthLoader.SHANGHAITECH_LABEL_PATH, video)) + gt.append(np.load(os.path.join(GroundTruthLoader.SHANGHAITECH_LABEL_PATH, video))) + + return gt + + @staticmethod + def __load_toydata_gt(): + with open(GroundTruthLoader.TOY_DATA_LABEL_PATH, 'r') as gt_file: + gt_dict = json.load(gt_file) + + gt = [] + for video, video_info in gt_dict.items(): + length = video_info['length'] + video_gt = np.zeros((length,), dtype=np.uint8) + sub_gt = np.array(np.matrix(video_info['gt'])) + + for anomaly in sub_gt: + start = anomaly[0] + end = anomaly[1] + 1 + video_gt[start: end] = 1 + gt.append(video_gt) + return gt + + @staticmethod + def get_pixel_masks_file_list(dataset): + # pixel mask folder + pixel_mask_folder = os.path.join(DATA_DIR, dataset, 'pixel_masks') + pixel_mask_file_list = os.listdir(pixel_mask_folder) + pixel_mask_file_list.sort() + + # get all testing videos + dataset_video_folder = GroundTruthLoader.NAME_FRAMES_MAPPING[dataset] + video_list = os.listdir(dataset_video_folder) + video_list.sort() + + # get all testing video names with pixel masks + pixel_video_ids = [] + ids = 0 + for pixel_mask_name in pixel_mask_file_list: + while ids < len(video_list): + if video_list[ids] + '.npy' == pixel_mask_name: + pixel_video_ids.append(ids) + ids += 1 + break + else: + ids += 1 + + assert len(pixel_video_ids) == len(pixel_mask_file_list) + + for i in range(len(pixel_mask_file_list)): + pixel_mask_file_list[i] = os.path.join(pixel_mask_folder, pixel_mask_file_list[i]) + + return pixel_mask_file_list, pixel_video_ids + + +def load_psnr_gt(loss_file): + with open(loss_file, 'rb') as reader: + # results { + # 'dataset': the name of dataset + # 'psnr': the psnr of each testing videos, + # } + + # psnr_records['psnr'] is np.array, shape(#videos) + # psnr_records[0] is np.array ------> 01.avi + # psnr_records[1] is np.array ------> 02.avi + # ...... + # psnr_records[n] is np.array ------> xx.avi + + results = pickle.load(reader) + + dataset = results['dataset'] + psnr_records = results['psnr'] + + num_videos = len(psnr_records) + + # load ground truth + gt_loader = GroundTruthLoader() + gt = gt_loader(dataset=dataset) + + assert num_videos == len(gt), 'the number of saved videos does not match the ground truth, {} != {}' \ + .format(num_videos, len(gt)) + + return dataset, psnr_records, gt + + +def load_psnr_gt_flow(loss_file): + with open(loss_file, 'rb') as reader: + # results { + # 'dataset': the name of dataset + # 'psnr': the psnr of each testing videos, + # } + + # psnr_records['psnr'] is np.array, shape(#videos) + # psnr_records[0] is np.array ------> 01.avi + # psnr_records[1] is np.array ------> 02.avi + # ...... + # psnr_records[n] is np.array ------> xx.avi + + results = pickle.load(reader) + + dataset = results['dataset'] + psnrs = results['psnr'] + flows = results['flow'] + + num_videos = len(psnrs) + + # load ground truth + gt_loader = GroundTruthLoader() + gt = gt_loader(dataset=dataset) + + assert num_videos == len(gt), 'the number of saved videos does not match the ground truth, {} != {}' \ + .format(num_videos, len(gt)) + + return dataset, psnrs, flows, gt + + +def load_psnr(loss_file): + """ + load image psnr or optical flow psnr. + :param loss_file: loss file path + :return: + """ + with open(loss_file, 'rb') as reader: + # results { + # 'dataset': the name of dataset + # 'psnr': the psnr of each testing videos, + # } + + # psnr_records['psnr'] is np.array, shape(#videos) + # psnr_records[0] is np.array ------> 01.avi + # psnr_records[1] is np.array ------> 02.avi + # ...... + # psnr_records[n] is np.array ------> xx.avi + + results = pickle.load(reader) + psnrs = results['psnr'] + return psnrs + + +def get_scores_labels(loss_file): + # the name of dataset, loss, and ground truth + dataset, psnr_records, gt = load_psnr_gt(loss_file=loss_file) + + # the number of videos + num_videos = len(psnr_records) + + scores = np.array([], dtype=np.float32) + labels = np.array([], dtype=np.int8) + # video normalization + for i in range(num_videos): + distance = psnr_records[i] + + if NORMALIZE: + distance -= distance.min() # distances = (distance - min) / (max - min) + distance /= distance.max() + # distance = 1 - distance + + scores = np.concatenate((scores[:], distance[DECIDABLE_IDX:]), axis=0) + labels = np.concatenate((labels[:], gt[i][DECIDABLE_IDX:]), axis=0) + return dataset, scores, labels + + +def precision_recall_auc(loss_file): + if not os.path.isdir(loss_file): + loss_file_list = [loss_file] + else: + loss_file_list = os.listdir(loss_file) + loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list] + + optimal_results = RecordResult() + for sub_loss_file in loss_file_list: + dataset, scores, labels = get_scores_labels(sub_loss_file) + precision, recall, thresholds = metrics.precision_recall_curve(labels, scores, pos_label=0) + auc = metrics.auc(recall, precision) + + results = RecordResult(recall, precision, auc, dataset, sub_loss_file) + + if optimal_results < results: + optimal_results = results + + if os.path.isdir(loss_file): + print(results) + print('##### optimal result and model = {}'.format(optimal_results)) + return optimal_results + + +def cal_eer(fpr, tpr): + # makes fpr + tpr = 1 + eer = fpr[np.nanargmin(np.absolute((fpr + tpr - 1)))] + return eer + + +def compute_eer(loss_file): + if not os.path.isdir(loss_file): + loss_file_list = [loss_file] + else: + loss_file_list = os.listdir(loss_file) + loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list] + + optimal_results = RecordResult(auc=np.inf) + for sub_loss_file in loss_file_list: + dataset, scores, labels = get_scores_labels(sub_loss_file) + fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0) + eer = cal_eer(fpr, tpr) + + results = RecordResult(fpr, tpr, eer, dataset, sub_loss_file) + + if optimal_results > results: + optimal_results = results + + if os.path.isdir(loss_file): + print(results) + print('##### optimal result and model = {}'.format(optimal_results)) + return optimal_results + + +def compute_auc(loss_file): + if not os.path.isdir(loss_file): + loss_file_list = [loss_file] + else: + loss_file_list = os.listdir(loss_file) + loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list] + + optimal_results = RecordResult() + for sub_loss_file in loss_file_list: + # the name of dataset, loss, and ground truth + dataset, psnr_records, gt = load_psnr_gt(loss_file=sub_loss_file) + + # the number of videos + num_videos = len(psnr_records) + + scores = np.array([], dtype=np.float32) + labels = np.array([], dtype=np.int8) + # video normalization + for i in range(num_videos): + distance = psnr_records[i] + + if NORMALIZE: + distance -= distance.min() # distances = (distance - min) / (max - min) + distance /= distance.max() + # distance = 1 - distance + + scores = np.concatenate((scores, distance[DECIDABLE_IDX:]), axis=0) + labels = np.concatenate((labels, gt[i][DECIDABLE_IDX:]), axis=0) + + fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0) + auc = metrics.auc(fpr, tpr) + + results = RecordResult(fpr, tpr, auc, dataset, sub_loss_file) + + if optimal_results < results: + optimal_results = results + + if os.path.isdir(loss_file): + print(results) + print('##### optimal result and model = {}'.format(optimal_results)) + return optimal_results + + +def average_psnr(loss_file): + if not os.path.isdir(loss_file): + loss_file_list = [loss_file] + else: + loss_file_list = os.listdir(loss_file) + loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list] + + max_avg_psnr = -np.inf + max_file = '' + for file in loss_file_list: + psnr_records = load_psnr(file) + + psnr_records = np.concatenate(psnr_records, axis=0) + avg_psnr = np.mean(psnr_records) + if max_avg_psnr < avg_psnr: + max_avg_psnr = avg_psnr + max_file = file + print('{}, average psnr = {}'.format(file, avg_psnr)) + + print('max average psnr file = {}, psnr = {}'.format(max_file, max_avg_psnr)) + + +def calculate_psnr(loss_file): + optical_result = compute_auc(loss_file) + print('##### optimal result and model = {}'.format(optical_result)) + + mean_psnr = [] + for file in os.listdir(loss_file): + file = os.path.join(loss_file, file) + dataset, psnr_records, gt = load_psnr_gt(file) + + psnr_records = np.concatenate(psnr_records, axis=0) + gt = np.concatenate(gt, axis=0) + + mean_normal_psnr = np.mean(psnr_records[gt == 0]) + mean_abnormal_psnr = np.mean(psnr_records[gt == 1]) + mean = np.mean(psnr_records) + print('mean normal psrn = {}, mean abnormal psrn = {}, mean = {}'.format( + mean_normal_psnr, + mean_abnormal_psnr, + mean) + ) + mean_psnr.append(mean) + print('max mean psnr = {}'.format(np.max(mean_psnr))) + + +def calculate_score(loss_file): + if not os.path.isdir(loss_file): + loss_file_path = loss_file + else: + optical_result = compute_auc(loss_file) + loss_file_path = optical_result.loss_file + print('##### optimal result and model = {}'.format(optical_result)) + dataset, psnr_records, gt = load_psnr_gt(loss_file=loss_file_path) + + # the number of videos + num_videos = len(psnr_records) + + scores = np.array([], dtype=np.float32) + labels = np.array([], dtype=np.int8) + # video normalization + for i in range(num_videos): + distance = psnr_records[i] + + distance = (distance - distance.min()) / (distance.max() - distance.min()) + + scores = np.concatenate((scores, distance[DECIDABLE_IDX:]), axis=0) + labels = np.concatenate((labels, gt[i][DECIDABLE_IDX:]), axis=0) + + mean_normal_scores = np.mean(scores[labels == 0]) + mean_abnormal_scores = np.mean(scores[labels == 1]) + print('mean normal scores = {}, mean abnormal scores = {}, ' + 'delta = {}'.format(mean_normal_scores, mean_abnormal_scores, mean_normal_scores - mean_abnormal_scores)) + + +def test_func(*args): + # simulate testing on CUHK AVENUE dataset + dataset = GroundTruthLoader.AVENUE + + # load the ground truth + gt_loader = GroundTruthLoader() + gt = gt_loader(dataset=dataset) + + num_videos = len(gt) + + simulated_results = { + 'dataset': dataset, + 'psnr': [] + } + + simulated_psnr = [] + for i in range(num_videos): + sub_video_length = gt[i].shape[0] + simulated_psnr.append(np.random.random(size=sub_video_length)) + + simulated_results['psnr'] = simulated_psnr + + # writing to file, 'generated_loss.bin' + with open('generated_loss.bin', 'wb') as writer: + pickle.dump(simulated_results, writer, pickle.HIGHEST_PROTOCOL) + + print(file_path.name) + result = compute_auc(file_path.name) + + print('optimal = {}'.format(result)) + + +eval_type_function = { + 'compute_auc': compute_auc, + 'compute_eer': compute_eer, + 'precision_recall_auc': precision_recall_auc, + 'calculate_psnr': calculate_psnr, + 'calculate_score': calculate_score, + 'average_psnr': average_psnr, + 'average_psnr_sample': average_psnr +} + + +def evaluate(eval_type, save_file): + assert eval_type in eval_type_function, 'there is no type of evaluation {}, please check {}' \ + .format(eval_type, eval_type_function.keys()) + eval_func = eval_type_function[eval_type] + optimal_results = eval_func(save_file) + return optimal_results + + +if __name__ == '__main__': + args = parser_args() + + eval_type = args.type + file_path = args.file + + print('Evaluate type = {}'.format(eval_type)) + print('File path = {}'.format(file_path)) + + if eval_type == 'test_func': + test_func() + else: + evaluate(eval_type, file_path)
\ No newline at end of file diff --git a/Codes/flownet2/.gitignore b/Codes/flownet2/.gitignore new file mode 100644 index 0000000..31abf4e --- /dev/null +++ b/Codes/flownet2/.gitignore @@ -0,0 +1,9 @@ +__pycache__/ +*.py[cod] +*$py.class +*.o +*.so +*.so.dSYM +checkpoints/ +!checkpoints/download.sh +!checkpoints/README.md diff --git a/Codes/flownet2/LICENSE b/Codes/flownet2/LICENSE new file mode 100644 index 0000000..d2cc224 --- /dev/null +++ b/Codes/flownet2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Sam Pepose + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Codes/flownet2/Makefile b/Codes/flownet2/Makefile new file mode 100644 index 0000000..073c011 --- /dev/null +++ b/Codes/flownet2/Makefile @@ -0,0 +1,82 @@ +# Makefile + +TF_INC = `python -c "import tensorflow; print(tensorflow.sysconfig.get_include())"` + +ifndef CUDA_HOME + CUDA_HOME := /usr/local/cuda +endif + +CC = gcc -O2 -pthread +CXX = g++ +GPUCC = nvcc +CFLAGS = -std=c++11 -I$(TF_INC) -I"$(CUDA_HOME)/include" -DGOOGLE_CUDA=1 +GPUCFLAGS = -c +LFLAGS = -pthread -shared -fPIC +GPULFLAGS = -x cu -Xcompiler -fPIC +CGPUFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart + +OUT_DIR = src/ops/build +PREPROCESSING_SRC = "src/ops/preprocessing/preprocessing.cc" "src/ops/preprocessing/kernels/flow_augmentation.cc" "src/ops/preprocessing/kernels/augmentation_base.cc" "src/ops/preprocessing/kernels/data_augmentation.cc" +GPU_SRC_DATA_AUG = src/ops/preprocessing/kernels/data_augmentation.cu.cc +GPU_SRC_FLOW = src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc +GPU_PROD_DATA_AUG = $(OUT_DIR)/data_augmentation.o +GPU_PROD_FLOW = $(OUT_DIR)/flow_augmentation_gpu.o +PREPROCESSING_PROD = $(OUT_DIR)/preprocessing.so + +DOWNSAMPLE_SRC = "src/ops/downsample/downsample_kernel.cc" "src/ops/downsample/downsample_op.cc" +GPU_SRC_DOWNSAMPLE = src/ops/downsample/downsample_kernel_gpu.cu.cc +GPU_PROD_DOWNSAMPLE = $(OUT_DIR)/downsample_kernel_gpu.o +DOWNSAMPLE_PROD = $(OUT_DIR)/downsample.so + +CORRELATION_SRC = "src/ops/correlation/correlation_kernel.cc" "src/ops/correlation/correlation_grad_kernel.cc" "src/ops/correlation/correlation_op.cc" +GPU_SRC_CORRELATION = src/ops/correlation/correlation_kernel.cu.cc +GPU_SRC_CORRELATION_GRAD = src/ops/correlation/correlation_grad_kernel.cu.cc +GPU_SRC_PAD = src/ops/correlation/pad.cu.cc +GPU_PROD_CORRELATION = $(OUT_DIR)/correlation_kernel_gpu.o +GPU_PROD_CORRELATION_GRAD = $(OUT_DIR)/correlation_grad_kernel_gpu.o +GPU_PROD_PAD = $(OUT_DIR)/correlation_pad_gpu.o +CORRELATION_PROD = $(OUT_DIR)/correlation.so + +FLOWWARP_SRC = "src/ops/flow_warp/flow_warp_op.cc" "src/ops/flow_warp/flow_warp.cc" "src/ops/flow_warp/flow_warp_grad.cc" +GPU_SRC_FLOWWARP = "src/ops/flow_warp/flow_warp.cu.cc" +GPU_SRC_FLOWWARP_GRAD = "src/ops/flow_warp/flow_warp_grad.cu.cc" +GPU_PROD_FLOWWARP = "$(OUT_DIR)/flow_warp_gpu.o" +GPU_PROD_FLOWWARP_GRAD = "$(OUT_DIR)/flow_warp_grad_gpu.o" +FLOWWARP_PROD = "$(OUT_DIR)/flow_warp.so" + +ifeq ($(OS),Windows_NT) + detected_OS := Windows +else + detected_OS := $(shell sh -c 'uname -s 2>/dev/null || echo not') +endif +ifeq ($(detected_OS),Darwin) # Mac OS X + CGPUFLAGS += -undefined dynamic_lookup +endif +ifeq ($(detected_OS),Linux) + CFLAGS += -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__ -D_GLIBCXX_USE_CXX11_ABI=0 +endif + +all: preprocessing downsample correlation flowwarp + +preprocessing: + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DATA_AUG) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DATA_AUG) + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOW) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOW) + $(CXX) -g $(CFLAGS) $(PREPROCESSING_SRC) $(GPU_PROD_DATA_AUG) $(GPU_PROD_FLOW) $(LFLAGS) $(CGPUFLAGS) -o $(PREPROCESSING_PROD) + +downsample: + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_DOWNSAMPLE) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_DOWNSAMPLE) + $(CXX) -g $(CFLAGS) $(DOWNSAMPLE_SRC) $(GPU_PROD_DOWNSAMPLE) $(LFLAGS) $(CGPUFLAGS) -o $(DOWNSAMPLE_PROD) + +correlation: + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION) + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_CORRELATION_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_CORRELATION_GRAD) + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_PAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_PAD) + $(CXX) -g $(CFLAGS) $(CORRELATION_SRC) $(GPU_PROD_CORRELATION) $(GPU_PROD_CORRELATION_GRAD) $(GPU_PROD_PAD) $(LFLAGS) $(CGPUFLAGS) -o $(CORRELATION_PROD) + +flowwarp: + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP) + $(GPUCC) -g $(CFLAGS) $(GPUCFLAGS) $(GPU_SRC_FLOWWARP_GRAD) $(GPULFLAGS) $(GPUDEF) -o $(GPU_PROD_FLOWWARP_GRAD) + $(CXX) -g $(CFLAGS) $(FLOWWARP_SRC) $(GPU_PROD_FLOWWARP) $(GPU_PROD_FLOWWARP_GRAD) $(LFLAGS) $(CGPUFLAGS) -o $(FLOWWARP_PROD) + +clean: + rm -f $(PREPROCESSING_PROD) $(GPU_PROD_FLOW) $(GPU_PROD_DATA_AUG) $(DOWNSAMPLE_PROD) $(GPU_PROD_DOWNSAMPLE) diff --git a/Codes/flownet2/README.md b/Codes/flownet2/README.md new file mode 100644 index 0000000..8647723 --- /dev/null +++ b/Codes/flownet2/README.md @@ -0,0 +1,66 @@ +## FlowNet2 (TensorFlow) + +This repo contains FlowNet2[1] for TensorFlow. It includes FlowNetC, S, CS, CSS, CSS-ft-sd, SD, and 2. + +### Installation +``` +pip install enum +pip install pypng +pip install matplotlib +pip install image +pip install scipy +pip install numpy +pip install tensorflow +``` + +Linux: +`sudo apt-get install python-tk` + +You must have CUDA installed: +`make all` + +### Download weights +To download the weights for all models (4.4GB), run the `download.sh` script in the `checkpoints` directory. All test scripts rely on these checkpoints to work properly. + + +### Flow Generation (1 image pair) + +``` +python -m src.flownet2.test --input_a data/samples/0img0.ppm --input_b data/samples/0img1.ppm --out ./ +``` + +Available models: +* `flownet2` +* `flownet_s` +* `flownet_c` +* `flownet_cs` +* `flownet_css` (can edit test.py to use css-ft-sd weights) +* `flownet_sd` + +If installation is successful, you should predict the following flow from samples/0img0.ppm: + + +### Training +If you would like to train any of the networks from scratch (replace `flownet2` with the appropriate model): +``` +python -m src.flownet2.train +``` +For stacked networks, previous network weights will be loaded and fixed. For example, if training CS, the C weights are loaded and fixed and the S weights are randomly initialized. + + +### Fine-tuning +TODO + +### Benchmarks +Benchmarks are for a forward pass with each model of two 512x384 images. All benchmarks were tested with a K80 GPU and Intel Xeon CPU E5-2682 v4 @ 2.30GHz. Code was executed with TensorFlow-1.2.1 and python 2.7.12 on Ubuntu 16.04. Resulting times were averaged over 10 runs. The first run is always slower as it sets up the Tensorflow Session. + +| | S | C | CS | CSS | SD | 2 +| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | +| First Run | 681.039ms | 898.792ms | 998.584ms | 1063.357ms | 933.806ms | 1882.003ms | +| Subsequent Runs | 38.067ms | 78.789ms | 123.300ms | 161.186ms | 62.061ms | 276.641ms | + + +### Sources +[1] E. Ilg, N. Mayer, T. Saikia, M. Keuper, A. Dosovitskiy, T. Brox +FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks, +IEEE Conference in Computer Vision and Pattern Recognition (CVPR), 2017. diff --git a/Codes/flownet2/__init__.py b/Codes/flownet2/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/__init__.py diff --git a/Codes/flownet2/corr.py b/Codes/flownet2/corr.py new file mode 100644 index 0000000..3301d8c --- /dev/null +++ b/Codes/flownet2/corr.py @@ -0,0 +1,45 @@ +import tensorflow as tf +import numpy as np +import math + +BATCH_SIZE = 8 +HEIGHT = 30 +WIDTH = 60 +CHANNELS = 3 + +NEIGHBORHOOD_SIZE = 41 +MAX_DISPLACEMENT = int(math.ceil(NEIGHBORHOOD_SIZE / 2.0)) +STRIDE_2 = 2 + +assert(STRIDE_2 <= NEIGHBORHOOD_SIZE) + +# Define two feature maps +fmA = tf.ones((BATCH_SIZE, HEIGHT, WIDTH, CHANNELS), dtype=tf.int32) +fmB = tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE, HEIGHT, WIDTH, CHANNELS)), dtype=tf.int32) + +depth = int(math.floor((2.0 * MAX_DISPLACEMENT + 1) / STRIDE_2) ** 2) + +print('Output should be size:', (BATCH_SIZE, HEIGHT, WIDTH, depth)) +print('Striding at values: ', [e for e in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2)]) + +def main(): + out = [] + for i in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # height + for j in range(-MAX_DISPLACEMENT + 1, MAX_DISPLACEMENT, STRIDE_2): # width + padded_a = tf.pad(fmA, [[0,0], [0, abs(i)], [0, abs(j)], [0, 0]]) + padded_b = tf.pad(fmB, [[0, 0], [abs(i), 0], [abs(j), 0], [0, 0]]) + m = padded_a * padded_b + + height_start_idx = 0 if i <= 0 else i + height_end_idx = height_start_idx + HEIGHT + width_start_idx = 0 if j <= 0 else j + width_end_idx = width_start_idx + WIDTH + cut = m[:, height_start_idx:height_end_idx, width_start_idx:width_end_idx, :] + + final = tf.reduce_sum(cut, 3) + out.append(final) + corr = tf.stack(out, 3) + print('Output size: ', corr.shape) + + +main() diff --git a/Codes/flownet2/src/__init__.py b/Codes/flownet2/src/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/__init__.py diff --git a/Codes/flownet2/src/correlation.py b/Codes/flownet2/src/correlation.py new file mode 100644 index 0000000..60a5c37 --- /dev/null +++ b/Codes/flownet2/src/correlation.py @@ -0,0 +1,35 @@ +import tensorflow as tf + +_correlation_ops = tf.load_op_library( + tf.resource_loader.get_path_to_datafile("./ops/build/correlation.so")) + + +def correlation(input_a, input_b, kernel_size, max_displacement, stride_1, stride_2, padding): + return _correlation_ops.correlation(input_a, + input_b, + kernel_size, + max_displacement, + stride_1, + stride_2, + padding) + + +@tf.RegisterGradient("Correlation") +def _correlation_grad(corr_op, gradients): + kernel_size = corr_op.get_attr("kernel_size") + max_displacement = corr_op.get_attr("max_displacement") + stride_1 = corr_op.get_attr("stride_1") + stride_2 = corr_op.get_attr("stride_2") + pad = corr_op.get_attr("pad") + + corr_grads = _correlation_ops.correlation_grad(gradients, + corr_op.inputs[0], + corr_op.inputs[1], + kernel_size, + max_displacement, + stride_1, + stride_2, + pad) + + # Return the gradients with respect to input_a and input_b + return corr_grads.backprops_a, corr_grads.backprops_b diff --git a/Codes/flownet2/src/dataloader.py b/Codes/flownet2/src/dataloader.py new file mode 100644 index 0000000..22a6ddb --- /dev/null +++ b/Codes/flownet2/src/dataloader.py @@ -0,0 +1,329 @@ +# -*- coding: utf-8 -*- +import tensorflow as tf +import copy +slim = tf.contrib.slim + +_preprocessing_ops = tf.load_op_library( + tf.resource_loader.get_path_to_datafile("./ops/build/preprocessing.so")) + + +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +class Image(slim.tfexample_decoder.ItemHandler): + """An ItemHandler that decodes a parsed Tensor as an image.""" + + def __init__(self, + image_key=None, + format_key=None, + shape=None, + channels=3, + dtype=tf.uint8, + repeated=False): + """Initializes the image. + Args: + image_key: the name of the TF-Example feature in which the encoded image + is stored. + shape: the output shape of the image as 1-D `Tensor` + [height, width, channels]. If provided, the image is reshaped + accordingly. If left as None, no reshaping is done. A shape should + be supplied only if all the stored images have the same shape. + channels: the number of channels in the image. + dtype: images will be decoded at this bit depth. Different formats + support different bit depths. + See tf.image.decode_image, + tf.decode_raw, + repeated: if False, decodes a single image. If True, decodes a + variable number of image strings from a 1D tensor of strings. + """ + if not image_key: + image_key = 'image/encoded' + + super(Image, self).__init__([image_key]) + self._image_key = image_key + self._shape = shape + self._channels = channels + self._dtype = dtype + self._repeated = repeated + + def tensors_to_item(self, keys_to_tensors): + """See base class.""" + image_buffer = keys_to_tensors[self._image_key] + + if self._repeated: + return functional_ops.map_fn(lambda x: self._decode(x), + image_buffer, dtype=self._dtype) + else: + return self._decode(image_buffer) + + def _decode(self, image_buffer): + """Decodes the image buffer. + Args: + image_buffer: The tensor representing the encoded image tensor. + Returns: + A tensor that represents decoded image of self._shape, or + (?, ?, self._channels) if self._shape is not specified. + """ + def decode_raw(): + """Decodes a raw image.""" + return tf.decode_raw(image_buffer, out_type=self._dtype) + + image = decode_raw() + # image.set_shape([None, None, self._channels]) + if self._shape is not None: + image = tf.reshape(image, self._shape) + + return image + + +def __get_dataset(dataset_config, split_name): + """ + dataset_config: A dataset_config defined in datasets.py + split_name: 'train'/'validate' + """ + with tf.name_scope('__get_dataset'): + if split_name not in dataset_config['SIZES']: + raise ValueError('split name %s not recognized' % split_name) + + IMAGE_HEIGHT, IMAGE_WIDTH = dataset_config['IMAGE_HEIGHT'], dataset_config['IMAGE_WIDTH'] + reader = tf.TFRecordReader + keys_to_features = { + 'image_a': tf.FixedLenFeature((), tf.string), + 'image_b': tf.FixedLenFeature((), tf.string), + 'flow': tf.FixedLenFeature((), tf.string), + } + items_to_handlers = { + 'image_a': Image( + image_key='image_a', + dtype=tf.float64, + shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3], + channels=3), + 'image_b': Image( + image_key='image_b', + dtype=tf.float64, + shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3], + channels=3), + 'flow': Image( + image_key='flow', + dtype=tf.float32, + shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 2], + channels=2), + } + decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) + return slim.dataset.Dataset( + data_sources=dataset_config['PATHS'][split_name], + reader=reader, + decoder=decoder, + num_samples=dataset_config['SIZES'][split_name], + items_to_descriptions=dataset_config['ITEMS_TO_DESCRIPTIONS']) + + +def config_to_arrays(dataset_config): + output = { + 'name': [], + 'rand_type': [], + 'exp': [], + 'mean': [], + 'spread': [], + 'prob': [], + 'coeff_schedule': [], + } + config = copy.deepcopy(dataset_config) + + if 'coeff_schedule_param' in config: + del config['coeff_schedule_param'] + + # Get all attributes + for (name, value) in config.iteritems(): + if name == 'coeff_schedule_param': + output['coeff_schedule'] = [value['half_life'], + value['initial_coeff'], + value['final_coeff']] + else: + output['name'].append(name) + output['rand_type'].append(value['rand_type']) + output['exp'].append(value['exp']) + output['mean'].append(value['mean']) + output['spread'].append(value['spread']) + output['prob'].append(value['prob']) + + return output + + +# https://github.com/tgebru/transform/blob/master/src/caffe/layers/data_augmentation_layer.cpp#L34 +def _generate_coeff(param, discount_coeff=tf.constant(1.0), default_value=tf.constant(0.0)): + if not all(name in param for name in ['rand_type', 'exp', 'mean', 'spread', 'prob']): + raise RuntimeError('Expected rand_type, exp, mean, spread, prob in `param`') + + rand_type = param['rand_type'] + exp = float(param['exp']) + mean = tf.convert_to_tensor(param['mean'], dtype=tf.float32) + spread = float(param['spread']) # AKA standard deviation + prob = float(param['prob']) + + # Multiply spread by our discount_coeff so it changes over time + spread = spread * discount_coeff + + if rand_type == 'uniform': + value = tf.cond(spread > 0.0, + lambda: tf.random_uniform([], mean - spread, mean + spread), + lambda: mean) + if exp: + value = tf.exp(value) + elif rand_type == 'gaussian': + value = tf.cond(spread > 0.0, + lambda: tf.random_normal([], mean, spread), + lambda: mean) + if exp: + value = tf.exp(value) + elif rand_type == 'bernoulli': + if prob > 0.0: + value = tf.contrib.distributions.Bernoulli(probs=prob).sample([]) + else: + value = 0.0 + elif rand_type == 'uniform_bernoulli': + tmp1 = 0.0 + tmp2 = 0 + if prob > 0.0: + tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([]) + else: + tmp2 = 0 + + if tmp2 == 0: + if default_value is not None: + return default_value + else: + tmp1 = tf.cond(spread > 0.0, + lambda: tf.random_uniform([], mean - spread, mean + spread), + lambda: mean) + if exp: + tmp1 = tf.exp(tmp1) + value = tmp1 + elif rand_type == 'gaussian_bernoulli': + tmp1 = 0.0 + tmp2 = 0 + if prob > 0.0: + tmp2 = tf.contrib.distributions.Bernoulli(probs=prob).sample([]) + else: + tmp2 = 0 + + if tmp2 == 0: + if default_value is not None: + return default_value + else: + tmp1 = tf.cond(spread > 0.0, + lambda: tf.random_normal([], mean, spread), + lambda: mean) + if exp: + tmp1 = tf.exp(tmp1) + value = tmp1 + else: + raise ValueError('Unknown distribution type %s.' % rand_type) + return value + + +def load_batch(dataset_config, split_name, global_step): + num_threads = 32 + reader_kwargs = {'options': tf.python_io.TFRecordOptions( + tf.python_io.TFRecordCompressionType.ZLIB)} + + with tf.name_scope('load_batch'): + dataset = __get_dataset(dataset_config, split_name) + data_provider = slim.dataset_data_provider.DatasetDataProvider( + dataset, + num_readers=num_threads, + common_queue_capacity=2048, + common_queue_min=1024, + reader_kwargs=reader_kwargs) + image_a, image_b, flow = data_provider.get(['image_a', 'image_b', 'flow']) + image_a, image_b, flow = map(tf.to_float, [image_a, image_b, flow]) + + if dataset_config['PREPROCESS']['scale']: + image_a = image_a / 255.0 + image_b = image_b / 255.0 + + crop = [dataset_config['PREPROCESS']['crop_height'], + dataset_config['PREPROCESS']['crop_width']] + config_a = config_to_arrays(dataset_config['PREPROCESS']['image_a']) + config_b = config_to_arrays(dataset_config['PREPROCESS']['image_b']) + + image_as, image_bs, flows = map(lambda x: tf.expand_dims(x, 0), [image_a, image_b, flow]) + + # Perform data augmentation on GPU + with tf.device('/cpu:0'): + image_as, image_bs, transforms_from_a, transforms_from_b = \ + _preprocessing_ops.data_augmentation(image_as, + image_bs, + global_step, + crop, + config_a['name'], + config_a['rand_type'], + config_a['exp'], + config_a['mean'], + config_a['spread'], + config_a['prob'], + config_a['coeff_schedule'], + config_b['name'], + config_b['rand_type'], + config_b['exp'], + config_b['mean'], + config_b['spread'], + config_b['prob'], + config_b['coeff_schedule']) + + noise_coeff_a = None + noise_coeff_b = None + + # Generate and apply noise coeff for A if defined in A params + if 'noise' in dataset_config['PREPROCESS']['image_a']: + discount_coeff = tf.constant(1.0) + if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_a']: + initial_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['initial_coeff'] + final_coeff = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['final_coeff'] + half_life = dataset_config['PREPROCESS']['image_a']['coeff_schedule_param']['half_life'] + discount_coeff = initial_coeff + \ + (final_coeff - initial_coeff) * \ + (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0) + + noise_coeff_a = _generate_coeff( + dataset_config['PREPROCESS']['image_a']['noise'], discount_coeff) + noise_a = tf.random_normal(shape=tf.shape(image_as), + mean=0.0, stddev=noise_coeff_a, + dtype=tf.float32) + image_as = tf.clip_by_value(image_as + noise_a, 0.0, 1.0) + + # Generate noise coeff for B if defined in B params + if 'noise' in dataset_config['PREPROCESS']['image_b']: + discount_coeff = tf.constant(1.0) + if 'coeff_schedule_param' in dataset_config['PREPROCESS']['image_b']: + initial_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['initial_coeff'] + final_coeff = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['final_coeff'] + half_life = dataset_config['PREPROCESS']['image_b']['coeff_schedule_param']['half_life'] + discount_coeff = initial_coeff + \ + (final_coeff - initial_coeff) * \ + (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0) + noise_coeff_b = _generate_coeff( + dataset_config['PREPROCESS']['image_b']['noise'], discount_coeff) + + # Combine coeff from a with coeff from b + if noise_coeff_a is not None: + if noise_coeff_b is not None: + noise_coeff_b = noise_coeff_a * noise_coeff_b + else: + noise_coeff_b = noise_coeff_a + + # Add noise to B if needed + if noise_coeff_b is not None: + noise_b = tf.random_normal(shape=tf.shape(image_bs), + mean=0.0, stddev=noise_coeff_b, + dtype=tf.float32) + image_bs = tf.clip_by_value(image_bs + noise_b, 0.0, 1.0) + + # Perform flow augmentation using spatial parameters from data augmentation + flows = _preprocessing_ops.flow_augmentation( + flows, transforms_from_a, transforms_from_b, crop) + + return tf.train.batch([image_as, image_bs, flows], + enqueue_many=True, + batch_size=dataset_config['BATCH_SIZE'], + capacity=dataset_config['BATCH_SIZE'] * 4, + num_threads=num_threads, + allow_smaller_final_batch=False) diff --git a/Codes/flownet2/src/dataset_configs.py b/Codes/flownet2/src/dataset_configs.py new file mode 100644 index 0000000..fbda5d0 --- /dev/null +++ b/Codes/flownet2/src/dataset_configs.py @@ -0,0 +1,153 @@ +""" +Add dataset configurations here. Each dataset must have the following structure: + +NAME = { + IMAGE_HEIGHT: int, + IMAGE_WIDTH: int, + ITEMS_TO_DESCRIPTIONS: { + 'image_a': 'A 3-channel image.', + 'image_b': 'A 3-channel image.', + 'flow': 'A 2-channel optical flow field', + }, + SIZES: { + 'train': int, + 'validate': int, (optional) + ... + }, + BATCH_SIZE: int, + PATHS: { + 'train': '', + 'validate': '', (optional) + ... + } +} +""" + +""" +note that one step = one batch of data processed, ~not~ an entire epoch +'coeff_schedule_param': { + 'half_life': 50000, after this many steps, the value will be i + (f - i)/2 + 'initial_coeff': 0.5, initial value + 'final_coeff': 1, final value +}, +""" + +FLYING_CHAIRS_DATASET_CONFIG = { + 'IMAGE_HEIGHT': 384, + 'IMAGE_WIDTH': 512, + 'ITEMS_TO_DESCRIPTIONS': { + 'image_a': 'A 3-channel image.', + 'image_b': 'A 3-channel image.', + 'flow': 'A 2-channel optical flow field', + }, + 'SIZES': { + 'train': 22232, + 'validate': 640, + 'sample': 8, + }, + 'BATCH_SIZE': 8, + 'PATHS': { + 'train': './data/tfrecords/fc_train.tfrecords', + 'validate': './data/tfrecords/fc_val.tfrecords', + 'sample': './data/tfrecords/fc_sample.tfrecords', + }, + 'PREPROCESS': { + 'scale': False, + 'crop_height': 320, + 'crop_width': 448, + 'image_a': { + 'translate': { + 'rand_type': "uniform_bernoulli", + 'exp': False, + 'mean': 0, + 'spread': 0.4, + 'prob': 1.0, + }, + 'rotate': { + 'rand_type': "uniform_bernoulli", + 'exp': False, + 'mean': 0, + 'spread': 0.4, + 'prob': 1.0, + }, + 'zoom': { + 'rand_type': "uniform_bernoulli", + 'exp': True, + 'mean': 0.2, + 'spread': 0.4, + 'prob': 1.0, + }, + 'squeeze': { + 'rand_type': "uniform_bernoulli", + 'exp': True, + 'mean': 0, + 'spread': 0.3, + 'prob': 1.0, + }, + 'noise': { + 'rand_type': "uniform_bernoulli", + 'exp': False, + 'mean': 0.03, + 'spread': 0.03, + 'prob': 1.0, + }, + }, + # All preprocessing to image A will be applied to image B in addition to the following. + 'image_b': { + 'translate': { + 'rand_type': "gaussian_bernoulli", + 'exp': False, + 'mean': 0, + 'spread': 0.03, + 'prob': 1.0, + }, + 'rotate': { + 'rand_type': "gaussian_bernoulli", + 'exp': False, + 'mean': 0, + 'spread': 0.03, + 'prob': 1.0, + }, + 'zoom': { + 'rand_type': "gaussian_bernoulli", + 'exp': True, + 'mean': 0, + 'spread': 0.03, + 'prob': 1.0, + }, + 'gamma': { + 'rand_type': "gaussian_bernoulli", + 'exp': True, + 'mean': 0, + 'spread': 0.02, + 'prob': 1.0, + }, + 'brightness': { + 'rand_type': "gaussian_bernoulli", + 'exp': False, + 'mean': 0, + 'spread': 0.02, + 'prob': 1.0, + }, + 'contrast': { + 'rand_type': "gaussian_bernoulli", + 'exp': True, + 'mean': 0, + 'spread': 0.02, + 'prob': 1.0, + }, + 'color': { + 'rand_type': "gaussian_bernoulli", + 'exp': True, + 'mean': 0, + 'spread': 0.02, + 'prob': 1.0, + }, + 'coeff_schedule_param': { + 'half_life': 50000, + 'initial_coeff': 0.5, + 'final_coeff': 1, + }, + } + }, +} diff --git a/Codes/flownet2/src/downsample.py b/Codes/flownet2/src/downsample.py new file mode 100644 index 0000000..5e6fc95 --- /dev/null +++ b/Codes/flownet2/src/downsample.py @@ -0,0 +1,8 @@ +import tensorflow as tf + +_downsample = tf.load_op_library( + tf.resource_loader.get_path_to_datafile("./ops/build/downsample.so")) + + +def downsample(tensor, size): + return _downsample.downsample(tensor, size) diff --git a/Codes/flownet2/src/flow_warp.py b/Codes/flownet2/src/flow_warp.py new file mode 100644 index 0000000..fe5fd4d --- /dev/null +++ b/Codes/flownet2/src/flow_warp.py @@ -0,0 +1,15 @@ +import tensorflow as tf + +_flow_warp_ops = tf.load_op_library( + tf.resource_loader.get_path_to_datafile("./ops/build/flow_warp.so")) + + +def flow_warp(image, flow): + return _flow_warp_ops.flow_warp(image, flow) + + +@tf.RegisterGradient("FlowWarp") +def _flow_warp_grad(flow_warp_op, gradients): + return _flow_warp_ops.flow_warp_grad(flow_warp_op.inputs[0], + flow_warp_op.inputs[1], + gradients) diff --git a/Codes/flownet2/src/flowlib.py b/Codes/flownet2/src/flowlib.py new file mode 100644 index 0000000..36c56d4 --- /dev/null +++ b/Codes/flownet2/src/flowlib.py @@ -0,0 +1,554 @@ +#!/usr/bin/python +""" +# ============================== +# flowlib.py +# library for optical flow processing +# Author: Ruoteng Li +# Date: 6th Aug 2016 +# ============================== +""" +import png +import numpy as np +import matplotlib.colors as cl +import matplotlib.pyplot as plt +from PIL import Image +import tensorflow as tf + + +UNKNOWN_FLOW_THRESH = 1e7 +SMALLFLOW = 0.0 +LARGEFLOW = 1e8 + +""" +============= +Flow Section +============= +""" + + +def show_flow(filename): + """ + visualize optical flow map using matplotlib + :param filename: optical flow file + :return: None + """ + flow = read_flow(filename) + img = flow_to_image(flow) + plt.imshow(img) + plt.show() + + +def visualize_flow(flow, mode='Y'): + """ + this function visualize the input flow + :param flow: input flow in array + :param mode: choose which color mode to visualize the flow (Y: Ccbcr, RGB: RGB color) + :return: None + """ + if mode == 'Y': + # Ccbcr color wheel + img = flow_to_image(flow) + plt.imshow(img) + plt.show() + elif mode == 'RGB': + (h, w) = flow.shape[0:2] + du = flow[:, :, 0] + dv = flow[:, :, 1] + valid = flow[:, :, 2] + max_flow = max(np.max(du), np.max(dv)) + img = np.zeros((h, w, 3), dtype=np.float64) + # angle layer + img[:, :, 0] = np.arctan2(dv, du) / (2 * np.pi) + # magnitude layer, normalized to 1 + img[:, :, 1] = np.sqrt(du * du + dv * dv) * 8 / max_flow + # phase layer + img[:, :, 2] = 8 - img[:, :, 1] + # clip to [0,1] + small_idx = img[:, :, 0:3] < 0 + large_idx = img[:, :, 0:3] > 1 + img[small_idx] = 0 + img[large_idx] = 1 + # convert to rgb + img = cl.hsv_to_rgb(img) + # remove invalid point + img[:, :, 0] = img[:, :, 0] * valid + img[:, :, 1] = img[:, :, 1] * valid + img[:, :, 2] = img[:, :, 2] * valid + # show + plt.imshow(img) + plt.show() + + return None + + +def read_flow(filename): + """ + read optical flow from Middlebury .flo file + :param filename: name of the flow file + :return: optical flow data in matrix + """ + f = open(filename, 'rb') + magic = np.fromfile(f, np.float32, count=1) + data2d = None + + if 202021.25 != magic: + print('Magic number incorrect. Invalid .flo file') + else: + w = np.fromfile(f, np.int32, count=1) + h = np.fromfile(f, np.int32, count=1) + print("Reading %d x %d flo file" % (h, w)) + data2d = np.fromfile(f, np.float32, count=2 * w * h) + # reshape data into 3D array (columns, rows, channels) + data2d = np.resize(data2d, (h[0], w[0], 2)) + f.close() + return data2d + + +def read_flow_png(flow_file): + """ + Read optical flow from KITTI .png file + :param flow_file: name of the flow file + :return: optical flow data in matrix + """ + flow_object = png.Reader(filename=flow_file) + flow_direct = flow_object.asDirect() + flow_data = list(flow_direct[2]) + (w, h) = flow_direct[3]['size'] + flow = np.zeros((h, w, 3), dtype=np.float64) + for i in range(len(flow_data)): + flow[i, :, 0] = flow_data[i][0::3] + flow[i, :, 1] = flow_data[i][1::3] + flow[i, :, 2] = flow_data[i][2::3] + + invalid_idx = (flow[:, :, 2] == 0) + flow[:, :, 0:2] = (flow[:, :, 0:2] - 2 ** 15) / 64.0 + flow[invalid_idx, 0] = 0 + flow[invalid_idx, 1] = 0 + return flow + + +def write_flow(flow, filename): + """ + write optical flow in Middlebury .flo format + :param flow: optical flow map + :param filename: optical flow file path to be saved + :return: None + """ + f = open(filename, 'wb') + magic = np.array([202021.25], dtype=np.float32) + (height, width) = flow.shape[0:2] + w = np.array([width], dtype=np.int32) + h = np.array([height], dtype=np.int32) + magic.tofile(f) + w.tofile(f) + h.tofile(f) + flow.tofile(f) + f.close() + + +def segment_flow(flow): + h = flow.shape[0] + w = flow.shape[1] + u = flow[:, :, 0] + v = flow[:, :, 1] + + idx = ((abs(u) > LARGEFLOW) | (abs(v) > LARGEFLOW)) + idx2 = (abs(u) == SMALLFLOW) + class0 = (v == 0) & (u == 0) + u[idx2] = 0.00001 + tan_value = v / u + + class1 = (tan_value < 1) & (tan_value >= 0) & (u > 0) & (v >= 0) + class2 = (tan_value >= 1) & (u >= 0) & (v >= 0) + class3 = (tan_value < -1) & (u <= 0) & (v >= 0) + class4 = (tan_value < 0) & (tan_value >= -1) & (u < 0) & (v >= 0) + class8 = (tan_value >= -1) & (tan_value < 0) & (u > 0) & (v <= 0) + class7 = (tan_value < -1) & (u >= 0) & (v <= 0) + class6 = (tan_value >= 1) & (u <= 0) & (v <= 0) + class5 = (tan_value >= 0) & (tan_value < 1) & (u < 0) & (v <= 0) + + seg = np.zeros((h, w)) + + seg[class1] = 1 + seg[class2] = 2 + seg[class3] = 3 + seg[class4] = 4 + seg[class5] = 5 + seg[class6] = 6 + seg[class7] = 7 + seg[class8] = 8 + seg[class0] = 0 + seg[idx] = 0 + + return seg + + +def flow_error(tu, tv, u, v): + """ + Calculate average end point error + :param tu: ground-truth horizontal flow map + :param tv: ground-truth vertical flow map + :param u: estimated horizontal flow map + :param v: estimated vertical flow map + :return: End point error of the estimated flow + """ + smallflow = 0.0 + ''' + stu = tu[bord+1:end-bord,bord+1:end-bord] + stv = tv[bord+1:end-bord,bord+1:end-bord] + su = u[bord+1:end-bord,bord+1:end-bord] + sv = v[bord+1:end-bord,bord+1:end-bord] + ''' + stu = tu[:] + stv = tv[:] + su = u[:] + sv = v[:] + + idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH) + stu[idxUnknow] = 0 + stv[idxUnknow] = 0 + su[idxUnknow] = 0 + sv[idxUnknow] = 0 + + ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)] + index_su = su[ind2] + index_sv = sv[ind2] + an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1) + un = index_su * an + vn = index_sv * an + + index_stu = stu[ind2] + index_stv = stv[ind2] + tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1) + tun = index_stu * tn + tvn = index_stv * tn + + ''' + angle = un * tun + vn * tvn + (an * tn) + index = [angle == 1.0] + angle[index] = 0.999 + ang = np.arccos(angle) + mang = np.mean(ang) + mang = mang * 180 / np.pi + ''' + + epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2) + epe = epe[ind2] + mepe = np.mean(epe) + return mepe + + +def flow_to_image(flow): + """ + Convert flow into middlebury color code image + :param flow: optical flow map + :return: optical flow image in middlebury color + """ + u = flow[:, :, 0] + v = flow[:, :, 1] + + maxu = -999. + maxv = -999. + minu = 999. + minv = 999. + + idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) + u[idxUnknow] = 0 + v[idxUnknow] = 0 + + maxu = max(maxu, np.max(u)) + minu = min(minu, np.min(u)) + + maxv = max(maxv, np.max(v)) + minv = min(minv, np.min(v)) + + rad = np.sqrt(u ** 2 + v ** 2) + maxrad = max(-1, np.max(rad)) + + # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu,maxu, minv, maxv)) + + u = u/(maxrad + np.finfo(float).eps) + v = v/(maxrad + np.finfo(float).eps) + + img = compute_color(u, v) + + idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) + img[idx] = 0 + + return np.uint8(img) + + +def tf_flow_to_image(flow): + """ + Convert flow into middlebury color code image + :param flow: optical flow map + :return: optical flow image in middlebury color + """ + u = flow[:, :, :, 0] + v = flow[:, :, :, 1] + + maxu = tf.constant(-999.) + maxv = tf.constant(-999.) + minu = tf.constant(999.) + minv = tf.constant(999.) + + zeros = tf.zeros_like(u, dtype=tf.float32) + u = tf.where(tf.greater(u, UNKNOWN_FLOW_THRESH), zeros, u) + v = tf.where(tf.greater(v, UNKNOWN_FLOW_THRESH), zeros, v) + + rad = tf.sqrt(u ** 2 + v ** 2) + maxrad = tf.reduce_max(-1, tf.reduce_max(rad)) + + # print("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu, maxu, minv, maxv)) + + u = u / (maxrad + np.finfo(float).eps) + v = v / (maxrad + np.finfo(float).eps) + + img = compute_color(u, v) + + # idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) + # img[idx] = 0 + + return np.uint8(img) + + +def evaluate_flow_file(gt, pred): + """ + evaluate the estimated optical flow end point error according to ground truth provided + :param gt: ground truth file path + :param pred: estimated optical flow file path + :return: end point error, float32 + """ + # Read flow files and calculate the errors + gt_flow = read_flow(gt) # ground truth flow + eva_flow = read_flow(pred) # predicted flow + # Calculate errors + average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], eva_flow[:, :, 0], eva_flow[:, :, 1]) + return average_pe + + +def evaluate_flow(gt_flow, pred_flow): + """ + gt: ground-truth flow + pred: estimated flow + """ + average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1]) + return average_pe + + +""" +============== +Disparity Section +============== +""" + + +def read_disp_png(file_name): + """ + Read optical flow from KITTI .png file + :param file_name: name of the flow file + :return: optical flow data in matrix + """ + image_object = png.Reader(filename=file_name) + image_direct = image_object.asDirect() + image_data = list(image_direct[2]) + (w, h) = image_direct[3]['size'] + channel = len(image_data[0]) / w + flow = np.zeros((h, w, channel), dtype=np.uint16) + for i in range(len(image_data)): + for j in range(channel): + flow[i, :, j] = image_data[i][j::channel] + return flow[:, :, 0] / 256 + + +def disp_to_flowfile(disp, filename): + """ + Read KITTI disparity file in png format + :param disp: disparity matrix + :param filename: the flow file name to save + :return: None + """ + f = open(filename, 'wb') + magic = np.array([202021.25], dtype=np.float32) + (height, width) = disp.shape[0:2] + w = np.array([width], dtype=np.int32) + h = np.array([height], dtype=np.int32) + empty_map = np.zeros((height, width), dtype=np.float32) + data = np.dstack((disp, empty_map)) + magic.tofile(f) + w.tofile(f) + h.tofile(f) + data.tofile(f) + f.close() + + +""" +============== +Image Section +============== +""" + + +def read_image(filename): + """ + Read normal image of any format + :param filename: name of the image file + :return: image data in matrix uint8 type + """ + img = Image.open(filename) + im = np.array(img) + return im + + +def warp_image(im, flow): + """ + Use optical flow to warp image to the next + :param im: image to warp + :param flow: optical flow + :return: warped image + """ + from scipy import interpolate + image_height = im.shape[0] + image_width = im.shape[1] + flow_height = flow.shape[0] + flow_width = flow.shape[1] + n = image_height * image_width + (iy, ix) = np.mgrid[0:image_height, 0:image_width] + (fy, fx) = np.mgrid[0:flow_height, 0:flow_width] + fx += flow[:,:,0] + fy += flow[:,:,1] + mask = np.logical_or(fx <0 , fx > flow_width) + mask = np.logical_or(mask, fy < 0) + mask = np.logical_or(mask, fy > flow_height) + fx = np.minimum(np.maximum(fx, 0), flow_width) + fy = np.minimum(np.maximum(fy, 0), flow_height) + points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1) + xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1) + warp = np.zeros((image_height, image_width, im.shape[2])) + for i in range(im.shape[2]): + channel = im[:, :, i] + plt.imshow(channel, cmap='gray') + values = channel.reshape(n, 1) + new_channel = interpolate.griddata(points, values, xi, method='cubic') + new_channel = np.reshape(new_channel, [flow_height, flow_width]) + new_channel[mask] = 1 + warp[:, :, i] = new_channel.astype(np.uint8) + + return warp.astype(np.uint8) + + +""" +============== +Others +============== +""" + + +def scale_image(image, new_range): + """ + Linearly scale the image into desired range + :param image: input image + :param new_range: the new range to be aligned + :return: image normalized in new range + """ + min_val = np.min(image).astype(np.float32) + max_val = np.max(image).astype(np.float32) + min_val_new = np.array(min(new_range), dtype=np.float32) + max_val_new = np.array(max(new_range), dtype=np.float32) + scaled_image = (image - min_val) / (max_val - min_val) * (max_val_new - min_val_new) + min_val_new + return scaled_image.astype(np.uint8) + + +def compute_color(u, v): + """ + compute optical flow color map + :param u: optical flow horizontal map + :param v: optical flow vertical map + :return: optical flow in color code + """ + [h, w] = u.shape + img = np.zeros([h, w, 3]) + nanIdx = np.isnan(u) | np.isnan(v) + u[nanIdx] = 0 + v[nanIdx] = 0 + + colorwheel = make_color_wheel() + # ncols = np.size(colorwheel, 0) + ncols = colorwheel.shape[0] + + rad = np.sqrt(u**2+v**2) + + a = np.arctan2(-v, -u) / np.pi + + fk = (a+1) / 2 * (ncols - 1) + 1 + + k0 = np.floor(fk).astype(int) + + k1 = k0 + 1 + k1[k1 == ncols+1] = 1 + f = fk - k0 + + for i in range(0, np.size(colorwheel, 1)): + tmp = colorwheel[:, i] + col0 = tmp[k0-1] / 255 + col1 = tmp[k1-1] / 255 + col = (1-f) * col0 + f * col1 + + idx = rad <= 1 + col[idx] = 1-rad[idx]*(1-col[idx]) + notidx = np.logical_not(idx) + + col[notidx] *= 0.75 + img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) + + return img + + +def make_color_wheel(): + """ + Generate color wheel according Middlebury color code + :return: Color wheel + """ + RY = 15 + YG = 6 + GC = 4 + CB = 11 + BM = 13 + MR = 6 + + ncols = RY + YG + GC + CB + BM + MR + + colorwheel = np.zeros([ncols, 3]) + + col = 0 + + # RY + colorwheel[0:RY, 0] = 255 + colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) + col += RY + + # YG + colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) + colorwheel[col:col+YG, 1] = 255 + col += YG + + # GC + colorwheel[col:col+GC, 1] = 255 + colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) + col += GC + + # CB + colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) + colorwheel[col:col+CB, 2] = 255 + col += CB + + # BM + colorwheel[col:col+BM, 2] = 255 + colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) + col += + BM + + # MR + colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) + colorwheel[col:col+MR, 0] = 255 + + return colorwheel diff --git a/Codes/flownet2/src/flownet2/__init__.py b/Codes/flownet2/src/flownet2/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet2/__init__.py diff --git a/Codes/flownet2/src/flownet2/flownet2.py b/Codes/flownet2/src/flownet2/flownet2.py new file mode 100644 index 0000000..d44ed10 --- /dev/null +++ b/Codes/flownet2/src/flownet2/flownet2.py @@ -0,0 +1,118 @@ +from ..net import Net, Mode +from ..flownet_css.flownet_css import FlowNetCSS +from ..flownet_sd.flownet_sd import FlowNetSD +from ..flow_warp import flow_warp +from ..utils import LeakyReLU, average_endpoint_error, pad, antipad +from ..downsample import downsample +import tensorflow as tf +slim = tf.contrib.slim + + +class FlowNet2(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + self.net_css = FlowNetCSS(mode, debug) + self.net_sd = FlowNetSD(mode, debug) + super(FlowNet2, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True): + _, height, width, _ = inputs['input_a'].shape.as_list() + with tf.variable_scope('FlowNet2'): + # Forward pass through FlowNetCSS and FlowNetSD with weights frozen + net_css_predictions = self.net_css.model(inputs, training_schedule, trainable=True) + net_sd_predictions = self.net_sd.model(inputs, training_schedule, trainable=True) + + def ChannelNorm(tensor): + sq = tf.square(tensor) + r_sum = tf.reduce_sum(sq, keep_dims=True, axis=3) + return tf.sqrt(r_sum) + + sd_flow_norm = ChannelNorm(net_sd_predictions['flow']) + css_flow_norm = ChannelNorm(net_css_predictions['flow']) + + flow_warp_sd = flow_warp(inputs['input_b'], net_sd_predictions['flow']) + img_diff_sd = inputs['input_a'] - flow_warp_sd + img_diff_sd_norm = ChannelNorm(img_diff_sd) + + flow_warp_css = flow_warp(inputs['input_b'], net_css_predictions['flow']) + img_diff_css = inputs['input_a'] - flow_warp_css + img_diff_css_norm = ChannelNorm(img_diff_css) + + input_to_fusion = tf.concat([inputs['input_a'], + net_sd_predictions['flow'], + net_css_predictions['flow'], + sd_flow_norm, + css_flow_norm, + img_diff_sd_norm, + img_diff_css_norm], axis=3) + + # Fusion Network + with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], + # Only backprop this network if trainable + trainable=trainable, + # He (aka MSRA) weight initialization + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=LeakyReLU, + # We will do our own padding to match the original Caffe code + padding='VALID'): + + weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay']) + with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): + fuse_conv0 = slim.conv2d(pad(input_to_fusion), 64, 3, scope='fuse_conv0') + fuse_conv1 = slim.conv2d(pad(fuse_conv0), 64, 3, stride=2, scope='fuse_conv1') + fuse_conv1_1 = slim.conv2d(pad(fuse_conv1), 128, 3, scope='fuse_conv1_1') + fuse_conv2 = slim.conv2d(pad(fuse_conv1_1), 128, 3, + stride=2, scope='fuse_conv2') + fuse_conv2_1 = slim.conv2d(pad(fuse_conv2), 128, 3, scope='fuse_conv2_1') + + predict_flow2 = slim.conv2d(pad(fuse_conv2_1), 2, 3, + scope='predict_flow2', + activation_fn=None) + fuse_deconv1 = antipad(slim.conv2d_transpose(fuse_conv2_1, 32, 4, + stride=2, + scope='fuse_deconv1')) + fuse_upsample_flow2to1 = antipad(slim.conv2d_transpose(predict_flow2, 2, 4, + stride=2, + scope='fuse_upsample_flow2to1', + activation_fn=None)) + concat1 = tf.concat([fuse_conv1_1, fuse_deconv1, + fuse_upsample_flow2to1], axis=3) + fuse_interconv1 = slim.conv2d(pad(concat1), 32, 3, + activation_fn=None, scope='fuse_interconv1') + + predict_flow1 = slim.conv2d(pad(fuse_interconv1), 2, 3, + scope='predict_flow1', + activation_fn=None) + fuse_deconv0 = antipad(slim.conv2d_transpose(concat1, 16, 4, + stride=2, + scope='fuse_deconv0')) + fuse_upsample_flow1to0 = antipad(slim.conv2d_transpose(predict_flow1, 2, 4, + stride=2, + scope='fuse_upsample_flow1to0', + activation_fn=None)) + concat0 = tf.concat([fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0], axis=3) + fuse_interconv0 = slim.conv2d(pad(concat0), 16, 3, + activation_fn=None, scope='fuse_interconv0') + + predict_flow0 = slim.conv2d(pad(fuse_interconv0), 2, + 3, activation_fn=None, scope='predict_flow0') + + flow = tf.image.resize_bilinear( + predict_flow0, tf.stack([height, width]), align_corners=True) + print(predict_flow0) + print(flow) + return { + 'predict_flow0': predict_flow0, + 'flow': flow, + } + + def loss(self, flow, predictions): + # L2 loss between predict_flow0, true flow (weighted w/ 0.005) + predict_flow0 = predictions['predict_flow0'] + size = [predict_flow0.shape[1], predict_flow0.shape[2]] + downsampled_flow0 = downsample(flow, size) + loss = average_endpoint_error(downsampled_flow0, predict_flow0) + tf.losses.add_loss(loss) + + # Return the 'total' loss: loss fns + regularization terms defined in the model + return tf.losses.get_total_loss() diff --git a/Codes/flownet2/src/flownet2/test.py b/Codes/flownet2/src/flownet2/test.py new file mode 100644 index 0000000..3177614 --- /dev/null +++ b/Codes/flownet2/src/flownet2/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet2 import FlowNet2 + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNet2(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNet2/flownet-2.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet2/train.py b/Codes/flownet2/src/flownet2/train.py new file mode 100644 index 0000000..40c028d --- /dev/null +++ b/Codes/flownet2/src/flownet2/train.py @@ -0,0 +1,24 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet2 import FlowNet2 + +# Create a new network +net = FlowNet2() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_2', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow, + # Load trained weights for CSS and SD parts of network + checkpoints={ + './checkpoints/FlowNetCSS-ft-sd/flownet-CSS-ft-sd.ckpt-0': ('FlowNet2/FlowNetCSS', 'FlowNet2'), + './checkpoints/FlowNetSD/flownet-SD.ckpt-0': ('FlowNet2/FlowNetSD', 'FlowNet2') + } +) diff --git a/Codes/flownet2/src/flownet_c/__init__.py b/Codes/flownet2/src/flownet_c/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet_c/__init__.py diff --git a/Codes/flownet2/src/flownet_c/flownet_c.py b/Codes/flownet2/src/flownet_c/flownet_c.py new file mode 100644 index 0000000..d333ee2 --- /dev/null +++ b/Codes/flownet2/src/flownet_c/flownet_c.py @@ -0,0 +1,167 @@ +from ..net import Net, Mode +from ..utils import LeakyReLU, average_endpoint_error, pad, antipad +from ..correlation import correlation +from ..downsample import downsample +import math +import tensorflow as tf +slim = tf.contrib.slim + + +class FlowNetC(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + super(FlowNetC, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True): + _, height, width, _ = inputs['input_a'].shape.as_list() + with tf.variable_scope('FlowNetC'): + with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], + # Only backprop this network if trainable + trainable=trainable, + # He (aka MSRA) weight initialization + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=LeakyReLU, + # We will do our own padding to match the original Caffe code + padding='VALID'): + + weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay']) + with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): + with slim.arg_scope([slim.conv2d], stride=2): + conv_a_1 = slim.conv2d(pad(inputs['input_a'], 3), 64, 7, scope='conv1') + conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2') + conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3') + + conv_b_1 = slim.conv2d(pad(inputs['input_b'], 3), + 64, 7, scope='conv1', reuse=True) + conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True) + conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True) + + # Compute cross correlation with leaky relu activation + cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20) + cc_relu = LeakyReLU(cc) + + # Combine cross correlation results with convolution of feature map A + netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir') + # Concatenate along the channels axis + net = tf.concat([netA_conv, cc_relu], axis=3) + + conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1') + with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3): + conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4') + conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1') + conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5') + conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1') + conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6') + conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1') + + """ START: Refinement Network """ + with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): + predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, + scope='predict_flow6', + activation_fn=None) + + deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4, + stride=2, + scope='deconv5')) + upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4, + stride=2, + scope='upsample_flow6to5', + activation_fn=None)) + concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3) + + predict_flow5 = slim.conv2d(pad(concat5), 2, 3, + scope='predict_flow5', + activation_fn=None) + deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4, + stride=2, + scope='deconv4')) + upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4, + stride=2, + scope='upsample_flow5to4', + activation_fn=None)) + concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3) + + predict_flow4 = slim.conv2d(pad(concat4), 2, 3, + scope='predict_flow4', + activation_fn=None) + deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4, + stride=2, + scope='deconv3')) + upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4, + stride=2, + scope='upsample_flow4to3', + activation_fn=None)) + concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3) + + predict_flow3 = slim.conv2d(pad(concat3), 2, 3, + scope='predict_flow3', + activation_fn=None) + deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4, + stride=2, + scope='deconv2')) + upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4, + stride=2, + scope='upsample_flow3to2', + activation_fn=None)) + concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3) + + predict_flow2 = slim.conv2d(pad(concat2), 2, 3, + scope='predict_flow2', + activation_fn=None) + """ END: Refinement Network """ + + flow = predict_flow2 * 20.0 + # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different + flow = tf.image.resize_bilinear(flow, + tf.stack([height, width]), + align_corners=True) + + return { + 'predict_flow6': predict_flow6, + 'predict_flow5': predict_flow5, + 'predict_flow4': predict_flow4, + 'predict_flow3': predict_flow3, + 'predict_flow2': predict_flow2, + 'flow': flow, + } + + def loss(self, flow, predictions): + flow = flow * 0.05 + + losses = [] + INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value) + + # L2 loss between predict_flow6, blob23 (weighted w/ 0.32) + predict_flow6 = predictions['predict_flow6'] + size = [predict_flow6.shape[1], predict_flow6.shape[2]] + downsampled_flow6 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow6, predict_flow6)) + + # L2 loss between predict_flow5, blob28 (weighted w/ 0.08) + predict_flow5 = predictions['predict_flow5'] + size = [predict_flow5.shape[1], predict_flow5.shape[2]] + downsampled_flow5 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow5, predict_flow5)) + + # L2 loss between predict_flow4, blob33 (weighted w/ 0.02) + predict_flow4 = predictions['predict_flow4'] + size = [predict_flow4.shape[1], predict_flow4.shape[2]] + downsampled_flow4 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow4, predict_flow4)) + + # L2 loss between predict_flow3, blob38 (weighted w/ 0.01) + predict_flow3 = predictions['predict_flow3'] + size = [predict_flow3.shape[1], predict_flow3.shape[2]] + downsampled_flow3 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow3, predict_flow3)) + + # L2 loss between predict_flow2, blob43 (weighted w/ 0.005) + predict_flow2 = predictions['predict_flow2'] + size = [predict_flow2.shape[1], predict_flow2.shape[2]] + downsampled_flow2 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow2, predict_flow2)) + + loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005]) + + # Return the 'total' loss: loss fns + regularization terms defined in the model + return tf.losses.get_total_loss() diff --git a/Codes/flownet2/src/flownet_c/test.py b/Codes/flownet2/src/flownet_c/test.py new file mode 100644 index 0000000..692f22d --- /dev/null +++ b/Codes/flownet2/src/flownet_c/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet_c import FlowNetC + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNetC(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNetC/flownet-C.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet_c/train.py b/Codes/flownet2/src/flownet_c/train.py new file mode 100644 index 0000000..9296ac7 --- /dev/null +++ b/Codes/flownet2/src/flownet_c/train.py @@ -0,0 +1,19 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet_c import FlowNetC + +# Create a new network +net = FlowNetC() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_c', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow +) diff --git a/Codes/flownet2/src/flownet_cs/__init__.py b/Codes/flownet2/src/flownet_cs/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet_cs/__init__.py diff --git a/Codes/flownet2/src/flownet_cs/flownet_cs.py b/Codes/flownet2/src/flownet_cs/flownet_cs.py new file mode 100644 index 0000000..aeaea47 --- /dev/null +++ b/Codes/flownet2/src/flownet_cs/flownet_cs.py @@ -0,0 +1,41 @@ +from ..net import Net, Mode +from ..flownet_c.flownet_c import FlowNetC +from ..flownet_s.flownet_s import FlowNetS +from ..flow_warp import flow_warp +import tensorflow as tf + + +class FlowNetCS(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + self.net_c = FlowNetC(mode, debug) + self.net_s = FlowNetS(mode, debug) + super(FlowNetCS, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True): + with tf.variable_scope('FlowNetCS'): + # Forward pass through FlowNetC with weights frozen + net_c_predictions = self.net_c.model(inputs, training_schedule, trainable=True) + + # Perform flow warping (to move image B closer to image A based on flow prediction) + warped = flow_warp(inputs['input_b'], net_c_predictions['flow']) + + # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels) + brightness_error = inputs['input_a'] - warped + brightness_error = tf.square(brightness_error) + brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3) + brightness_error = tf.sqrt(brightness_error) + + # Gather all inputs to FlowNetS + inputs_to_s = { + 'input_a': inputs['input_a'], + 'input_b': inputs['input_b'], + 'warped': warped, + 'flow': net_c_predictions['flow'] * 0.05, + 'brightness_error': brightness_error, + } + + return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable) + + def loss(self, flow, predictions): + return self.net_s.loss(flow, predictions) diff --git a/Codes/flownet2/src/flownet_cs/test.py b/Codes/flownet2/src/flownet_cs/test.py new file mode 100644 index 0000000..ae00ff4 --- /dev/null +++ b/Codes/flownet2/src/flownet_cs/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet_cs import FlowNetCS + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNetCS(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNetCS/flownet-CS.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet_cs/train.py b/Codes/flownet2/src/flownet_cs/train.py new file mode 100644 index 0000000..9376132 --- /dev/null +++ b/Codes/flownet2/src/flownet_cs/train.py @@ -0,0 +1,21 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet_cs import FlowNetCS + +# Create a new network +net = FlowNetCS() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_cs', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow, + # Load trained weights for C part of network + checkpoints={'./checkpoints/FlowNetC/flownet-C.ckpt-0': ('FlowNetCS/FlowNetC', 'FlowNetCS')} +) diff --git a/Codes/flownet2/src/flownet_css/__init__.py b/Codes/flownet2/src/flownet_css/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet_css/__init__.py diff --git a/Codes/flownet2/src/flownet_css/flownet_css.py b/Codes/flownet2/src/flownet_css/flownet_css.py new file mode 100644 index 0000000..93d9db2 --- /dev/null +++ b/Codes/flownet2/src/flownet_css/flownet_css.py @@ -0,0 +1,41 @@ +from ..net import Net, Mode +from ..flownet_cs.flownet_cs import FlowNetCS +from ..flownet_s.flownet_s import FlowNetS +from ..flow_warp import flow_warp +import tensorflow as tf + + +class FlowNetCSS(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + self.net_cs = FlowNetCS(mode, debug) + self.net_s = FlowNetS(mode, debug) + super(FlowNetCSS, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True): + with tf.variable_scope('FlowNetCSS'): + # Forward pass through FlowNetCS with weights frozen + net_cs_predictions = self.net_cs.model(inputs, training_schedule, trainable=True) + + # Perform flow warping (to move image B closer to image A based on flow prediction) + warped = flow_warp(inputs['input_b'], net_cs_predictions['flow']) + + # Compute brightness error: sqrt(sum (input_a - warped)^2 over channels) + brightness_error = inputs['input_a'] - warped + brightness_error = tf.square(brightness_error) + brightness_error = tf.reduce_sum(brightness_error, keep_dims=True, axis=3) + brightness_error = tf.sqrt(brightness_error) + + # Gather all inputs to FlowNetS + inputs_to_s = { + 'input_a': inputs['input_a'], + 'input_b': inputs['input_b'], + 'warped': warped, + 'flow': net_cs_predictions['flow'] * 0.05, + 'brightness_error': brightness_error, + } + + return self.net_s.model(inputs_to_s, training_schedule, trainable=trainable) + + def loss(self, flow, predictions): + return self.net_s.loss(flow, predictions) diff --git a/Codes/flownet2/src/flownet_css/test.py b/Codes/flownet2/src/flownet_css/test.py new file mode 100644 index 0000000..9d1249e --- /dev/null +++ b/Codes/flownet2/src/flownet_css/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet_css import FlowNetCSS + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNetCSS(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNetCSS/flownet-CSS.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet_css/train.py b/Codes/flownet2/src/flownet_css/train.py new file mode 100644 index 0000000..2964f3e --- /dev/null +++ b/Codes/flownet2/src/flownet_css/train.py @@ -0,0 +1,22 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet_css import FlowNetCSS + +# Create a new network +net = FlowNetCSS() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_css', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow, + # Load trained weights for CS part of network + checkpoints={ + './checkpoints/FlowNetCS/flownet-CS.ckpt-0': ('FlowNetCSS/FlowNetCS', 'FlowNetCSS')} +) diff --git a/Codes/flownet2/src/flownet_s/__init__.py b/Codes/flownet2/src/flownet_s/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet_s/__init__.py diff --git a/Codes/flownet2/src/flownet_s/flownet_s.py b/Codes/flownet2/src/flownet_s/flownet_s.py new file mode 100644 index 0000000..f6704b1 --- /dev/null +++ b/Codes/flownet2/src/flownet_s/flownet_s.py @@ -0,0 +1,161 @@ +from ..net import Net, Mode +from ..utils import LeakyReLU, average_endpoint_error, pad, antipad +from ..downsample import downsample +import math +import tensorflow as tf +slim = tf.contrib.slim + + +class FlowNetS(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + super(FlowNetS, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True): + _, height, width, _ = inputs['input_a'].shape.as_list() + stacked = False + with tf.variable_scope('FlowNetS'): + if 'warped' in inputs and 'flow' in inputs and 'brightness_error' in inputs: + stacked = True + concat_inputs = tf.concat([inputs['input_a'], + inputs['input_b'], + inputs['warped'], + inputs['flow'], + inputs['brightness_error']], axis=3) + else: + concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3) + with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], + # Only backprop this network if trainable + trainable=trainable, + # He (aka MSRA) weight initialization + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=LeakyReLU, + # We will do our own padding to match the original Caffe code + padding='VALID'): + + weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay']) + with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): + with slim.arg_scope([slim.conv2d], stride=2): + conv_1 = slim.conv2d(pad(concat_inputs, 3), 64, 7, scope='conv1') + conv_2 = slim.conv2d(pad(conv_1, 2), 128, 5, scope='conv2') + conv_3 = slim.conv2d(pad(conv_2, 2), 256, 5, scope='conv3') + + conv3_1 = slim.conv2d(pad(conv_3), 256, 3, scope='conv3_1') + with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3): + conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4') + conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1') + conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5') + conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1') + conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6') + conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1') + + """ START: Refinement Network """ + with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): + predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, + scope='predict_flow6', + activation_fn=None) + deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4, + stride=2, + scope='deconv5')) + upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4, + stride=2, + scope='upsample_flow6to5', + activation_fn=None)) + concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3) + + predict_flow5 = slim.conv2d(pad(concat5), 2, 3, + scope='predict_flow5', + activation_fn=None) + deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4, + stride=2, + scope='deconv4')) + upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4, + stride=2, + scope='upsample_flow5to4', + activation_fn=None)) + concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3) + + predict_flow4 = slim.conv2d(pad(concat4), 2, 3, + scope='predict_flow4', + activation_fn=None) + deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4, + stride=2, + scope='deconv3')) + upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4, + stride=2, + scope='upsample_flow4to3', + activation_fn=None)) + concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3) + + predict_flow3 = slim.conv2d(pad(concat3), 2, 3, + scope='predict_flow3', + activation_fn=None) + deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4, + stride=2, + scope='deconv2')) + upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4, + stride=2, + scope='upsample_flow3to2', + activation_fn=None)) + concat2 = tf.concat([conv_2, deconv2, upsample_flow3to2], axis=3) + + predict_flow2 = slim.conv2d(pad(concat2), 2, 3, + scope='predict_flow2', + activation_fn=None) + """ END: Refinement Network """ + + flow = predict_flow2 * 20.0 + # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different + flow = tf.image.resize_bilinear(flow, + tf.stack([height, width]), + align_corners=True) + + return { + 'predict_flow6': predict_flow6, + 'predict_flow5': predict_flow5, + 'predict_flow4': predict_flow4, + 'predict_flow3': predict_flow3, + 'predict_flow2': predict_flow2, + 'flow': flow, + } + + def loss(self, flow, predictions): + flow = flow * 0.05 + + losses = [] + INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value) + + # L2 loss between predict_flow6, blob23 (weighted w/ 0.32) + predict_flow6 = predictions['predict_flow6'] + size = [predict_flow6.shape[1], predict_flow6.shape[2]] + downsampled_flow6 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow6, predict_flow6)) + + # L2 loss between predict_flow5, blob28 (weighted w/ 0.08) + predict_flow5 = predictions['predict_flow5'] + size = [predict_flow5.shape[1], predict_flow5.shape[2]] + downsampled_flow5 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow5, predict_flow5)) + + # L2 loss between predict_flow4, blob33 (weighted w/ 0.02) + predict_flow4 = predictions['predict_flow4'] + size = [predict_flow4.shape[1], predict_flow4.shape[2]] + downsampled_flow4 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow4, predict_flow4)) + + # L2 loss between predict_flow3, blob38 (weighted w/ 0.01) + predict_flow3 = predictions['predict_flow3'] + size = [predict_flow3.shape[1], predict_flow3.shape[2]] + downsampled_flow3 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow3, predict_flow3)) + + # L2 loss between predict_flow2, blob43 (weighted w/ 0.005) + predict_flow2 = predictions['predict_flow2'] + size = [predict_flow2.shape[1], predict_flow2.shape[2]] + downsampled_flow2 = downsample(flow, size) + losses.append(average_endpoint_error(downsampled_flow2, predict_flow2)) + + loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005]) + + # Return the 'total' loss: loss fns + regularization terms defined in the model + return tf.losses.get_total_loss() diff --git a/Codes/flownet2/src/flownet_s/test.py b/Codes/flownet2/src/flownet_s/test.py new file mode 100644 index 0000000..ae1b2f3 --- /dev/null +++ b/Codes/flownet2/src/flownet_s/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet_s import FlowNetS + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNetS(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNetS/flownet-S.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet_s/train.py b/Codes/flownet2/src/flownet_s/train.py new file mode 100644 index 0000000..13a792a --- /dev/null +++ b/Codes/flownet2/src/flownet_s/train.py @@ -0,0 +1,19 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet_s import FlowNetS + +# Create a new network +net = FlowNetS() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_s_sample', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow +) diff --git a/Codes/flownet2/src/flownet_sd/__init__.py b/Codes/flownet2/src/flownet_sd/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/flownet_sd/__init__.py diff --git a/Codes/flownet2/src/flownet_sd/flownet_sd.py b/Codes/flownet2/src/flownet_sd/flownet_sd.py new file mode 100644 index 0000000..2f5c9e4 --- /dev/null +++ b/Codes/flownet2/src/flownet_sd/flownet_sd.py @@ -0,0 +1,160 @@ +from ..net import Net, Mode +from ..utils import LeakyReLU, average_endpoint_error, pad, antipad +# from ..downsample import downsample +import math +import tensorflow as tf +slim = tf.contrib.slim + + +class FlowNetSD(Net): + + def __init__(self, mode=Mode.TRAIN, debug=False): + super(FlowNetSD, self).__init__(mode=mode, debug=debug) + + def model(self, inputs, training_schedule, trainable=True, reuse=None): + _, height, width, _ = inputs['input_a'].shape.as_list() + with tf.variable_scope('FlowNetSD', reuse=reuse): + concat_inputs = tf.concat([inputs['input_a'], inputs['input_b']], axis=3) + with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], + # Only backprop this network if trainable + trainable=trainable, + # He (aka MSRA) weight initialization + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=LeakyReLU, + # We will do our own padding to match the original Caffe code + padding='VALID'): + + weights_regularizer = slim.l2_regularizer(training_schedule['weight_decay']) + with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): + conv0 = slim.conv2d(pad(concat_inputs), 64, 3, scope='conv0') + conv1 = slim.conv2d(pad(conv0), 64, 3, stride=2, scope='conv1') + conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope='conv1_1') + conv2 = slim.conv2d(pad(conv1_1), 128, 3, stride=2, scope='conv2') + conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope='conv2_1') + conv3 = slim.conv2d(pad(conv2_1), 256, 3, stride=2, scope='conv3') + conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope='conv3_1') + conv4 = slim.conv2d(pad(conv3_1), 512, 3, stride=2, scope='conv4') + conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope='conv4_1') + conv5 = slim.conv2d(pad(conv4_1), 512, 3, stride=2, scope='conv5') + conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope='conv5_1') + conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6') + conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1') + + """ START: Refinement Network """ + with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): + predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, + scope='predict_flow6', + activation_fn=None) + deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4, + stride=2, + scope='deconv5')) + upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4, + stride=2, + scope='upsample_flow6to5', + activation_fn=None)) + concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3) + interconv5 = slim.conv2d(pad(concat5), 512, 3, + activation_fn=None, scope='interconv5') + + predict_flow5 = slim.conv2d(pad(interconv5), 2, 3, + scope='predict_flow5', + activation_fn=None) + deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4, + stride=2, + scope='deconv4')) + upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4, + stride=2, + scope='upsample_flow5to4', + activation_fn=None)) + concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3) + interconv4 = slim.conv2d(pad(concat4), 256, 3, + activation_fn=None, scope='interconv4') + + predict_flow4 = slim.conv2d(pad(interconv4), 2, 3, + scope='predict_flow4', + activation_fn=None) + deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4, + stride=2, + scope='deconv3')) + upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4, + stride=2, + scope='upsample_flow4to3', + activation_fn=None)) + concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3) + interconv3 = slim.conv2d(pad(concat3), 128, 3, + activation_fn=None, scope='interconv3') + + predict_flow3 = slim.conv2d(pad(interconv3), 2, 3, + scope='predict_flow3', + activation_fn=None) + deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4, + stride=2, + scope='deconv2')) + upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4, + stride=2, + scope='upsample_flow3to2', + activation_fn=None)) + concat2 = tf.concat([conv2, deconv2, upsample_flow3to2], axis=3) + interconv2 = slim.conv2d(pad(concat2), 64, 3, + activation_fn=None, scope='interconv2') + + predict_flow2 = slim.conv2d(pad(interconv2), 2, 3, + scope='predict_flow2', + activation_fn=None) + """ END: Refinement Network """ + + flow = predict_flow2 * 0.05 + # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different + flow = tf.image.resize_bilinear(flow, + tf.stack([height, width]), + align_corners=True) + + return { + 'predict_flow6': predict_flow6, + 'predict_flow5': predict_flow5, + 'predict_flow4': predict_flow4, + 'predict_flow3': predict_flow3, + 'predict_flow2': predict_flow2, + 'flow': flow, + } + + # def loss(self, flow, predictions): + # flow = flow * 20.0 + # + # losses = [] + # INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float(flow.shape[2].value) + # + # # L2 loss between predict_flow6, blob23 (weighted w/ 0.32) + # predict_flow6 = predictions['predict_flow6'] + # size = [predict_flow6.shape[1], predict_flow6.shape[2]] + # downsampled_flow6 = downsample(flow, size) + # losses.append(average_endpoint_error(downsampled_flow6, predict_flow6)) + # + # # L2 loss between predict_flow5, blob28 (weighted w/ 0.08) + # predict_flow5 = predictions['predict_flow5'] + # size = [predict_flow5.shape[1], predict_flow5.shape[2]] + # downsampled_flow5 = downsample(flow, size) + # losses.append(average_endpoint_error(downsampled_flow5, predict_flow5)) + # + # # L2 loss between predict_flow4, blob33 (weighted w/ 0.02) + # predict_flow4 = predictions['predict_flow4'] + # size = [predict_flow4.shape[1], predict_flow4.shape[2]] + # downsampled_flow4 = downsample(flow, size) + # losses.append(average_endpoint_error(downsampled_flow4, predict_flow4)) + # + # # L2 loss between predict_flow3, blob38 (weighted w/ 0.01) + # predict_flow3 = predictions['predict_flow3'] + # size = [predict_flow3.shape[1], predict_flow3.shape[2]] + # downsampled_flow3 = downsample(flow, size) + # losses.append(average_endpoint_error(downsampled_flow3, predict_flow3)) + # + # # L2 loss between predict_flow2, blob43 (weighted w/ 0.005) + # predict_flow2 = predictions['predict_flow2'] + # size = [predict_flow2.shape[1], predict_flow2.shape[2]] + # downsampled_flow2 = downsample(flow, size) + # losses.append(average_endpoint_error(downsampled_flow2, predict_flow2)) + # + # loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005]) + # + # # Return the 'total' loss: loss fns + regularization terms defined in the model + # return tf.losses.get_total_loss() diff --git a/Codes/flownet2/src/flownet_sd/test.py b/Codes/flownet2/src/flownet_sd/test.py new file mode 100644 index 0000000..b2ac285 --- /dev/null +++ b/Codes/flownet2/src/flownet_sd/test.py @@ -0,0 +1,51 @@ +import argparse +import os +from ..net import Mode +from .flownet_sd import FlowNetSD + +FLAGS = None + + +def main(): + # Create a new network + net = FlowNetSD(mode=Mode.TEST) + + # Train on the data + net.test( + checkpoint='./checkpoints/FlowNetSD/flownet-SD.ckpt-0', + input_a_path=FLAGS.input_a, + input_b_path=FLAGS.input_b, + out_path=FLAGS.out, + ) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_a', + type=str, + required=True, + help='Path to first image' + ) + parser.add_argument( + '--input_b', + type=str, + required=True, + help='Path to second image' + ) + parser.add_argument( + '--out', + type=str, + required=True, + help='Path to output flow result' + ) + FLAGS = parser.parse_args() + + # Verify arguments are valid + if not os.path.exists(FLAGS.input_a): + raise ValueError('image_a path must exist') + if not os.path.exists(FLAGS.input_b): + raise ValueError('image_b path must exist') + if not os.path.isdir(FLAGS.out): + raise ValueError('out directory must exist') + main() diff --git a/Codes/flownet2/src/flownet_sd/train.py b/Codes/flownet2/src/flownet_sd/train.py new file mode 100644 index 0000000..86c64e5 --- /dev/null +++ b/Codes/flownet2/src/flownet_sd/train.py @@ -0,0 +1,19 @@ +from ..dataloader import load_batch +from ..dataset_configs import FLYING_CHAIRS_DATASET_CONFIG +from ..training_schedules import LONG_SCHEDULE +from .flownet_sd import FlowNetSD + +# Create a new network +net = FlowNetSD() + +# Load a batch of data +input_a, input_b, flow = load_batch(FLYING_CHAIRS_DATASET_CONFIG, 'sample', net.global_step) + +# Train on the data +net.train( + log_dir='./logs/flownet_sd_sample', + training_schedule=LONG_SCHEDULE, + input_a=input_a, + input_b=input_b, + flow=flow +) diff --git a/Codes/flownet2/src/net.py b/Codes/flownet2/src/net.py new file mode 100644 index 0000000..43b2193 --- /dev/null +++ b/Codes/flownet2/src/net.py @@ -0,0 +1,177 @@ +import abc +from enum import Enum +import os +import tensorflow as tf +from .flowlib import flow_to_image, write_flow +import numpy as np +# from scipy.misc import imread, imsave, imresize +import cv2 +import uuid +from .training_schedules import LONG_SCHEDULE +slim = tf.contrib.slim + +os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID" +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class Mode(Enum): + TRAIN = 1 + TEST = 2 + + +class Net(object): + __metaclass__ = abc.ABCMeta + + def __init__(self, mode=Mode.TRAIN, debug=False): + self.global_step = slim.get_or_create_global_step() + self.mode = mode + self.debug = debug + + @abc.abstractmethod + def model(self, inputs, training_schedule, trainable=True): + """ + Defines the model and returns a tuple of Tensors needed for calculating the loss. + """ + return + + @abc.abstractmethod + def loss(self, **kwargs): + """ + Accepts prediction Tensors from the output of `model`. + Returns a single Tensor representing the total loss of the model. + """ + return + """ + python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_6.png \ + --input_b /home/liuwen/ssd/videogan/Save_2017_05_31/Images/ped1_adv/Evaluate/model.ckpt-100000/01/gen_7.png \ + --out ./ + python -m src.flownet_sd.test --input_a 006.png --input_b 007.png --out ./ + python -m src.flownet_sd.test --input_a /home/liuwen/ssd/videogan/ped1/frames/testing/01/006.jpg \ + --input_b /home/liuwen/ssd/videogan/ped1/frames/testing/01/007.jpg \ + --out ./ + """ + def test(self, checkpoint, input_a_path, input_b_path, out_path, save_image=True, save_flo=False): + input_a = cv2.imread(input_a_path) + input_b = cv2.imread(input_b_path) + + input_a = cv2.resize(input_a, (512, 384)) + input_b = cv2.resize(input_b, (512, 384)) + print(input_a.shape, input_b.shape) + + # Convert from RGB -> BGR + # input_a = input_a[..., [2, 1, 0]] + # input_b = input_b[..., [2, 1, 0]] + + # Scale from [0, 255] -> [0.0, 1.0] if needed + if input_a.max() > 1.0: + input_a = input_a / 255.0 + if input_b.max() > 1.0: + input_b = input_b / 255.0 + + # TODO: This is a hack, we should get rid of this + training_schedule = LONG_SCHEDULE + + inputs = { + 'input_a': tf.expand_dims(tf.constant(input_a, dtype=tf.float32), 0), + 'input_b': tf.expand_dims(tf.constant(input_b, dtype=tf.float32), 0), + } + predictions = self.model(inputs, training_schedule) + pred_flow = predictions['flow'] + + saver = tf.train.Saver() + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + with tf.Session(config=config) as sess: + saver.restore(sess, checkpoint) + pred_flow = sess.run(pred_flow)[0, :, :, :] + + np.save('temporal_ped1', pred_flow) + + unique_name = 'flow-' + str(uuid.uuid4()) + if save_image: + flow_img = flow_to_image(pred_flow) + full_out_path = os.path.join(out_path, unique_name + '.png') + cv2.imwrite(full_out_path, flow_img) + + if save_flo: + full_out_path = os.path.join(out_path, unique_name + '.flo') + write_flow(pred_flow, full_out_path) + + def train(self, log_dir, training_schedule, input_a, input_b, flow, checkpoints=None): + tf.summary.image("image_a", input_a, max_outputs=2) + tf.summary.image("image_b", input_b, max_outputs=2) + + self.learning_rate = tf.train.piecewise_constant( + self.global_step, + [tf.cast(v, tf.int64) for v in training_schedule['step_values']], + training_schedule['learning_rates']) + + optimizer = tf.train.AdamOptimizer( + self.learning_rate, + training_schedule['momentum'], + training_schedule['momentum2']) + + inputs = { + 'input_a': input_a, + 'input_b': input_b, + } + predictions = self.model(inputs, training_schedule) + total_loss = self.loss(flow, predictions) + tf.summary.scalar('loss', total_loss) + + if checkpoints: + for (checkpoint_path, (scope, new_scope)) in checkpoints.iteritems(): + variables_to_restore = slim.get_variables(scope=scope) + renamed_variables = { + var.op.name.split(new_scope + '/')[1]: var + for var in variables_to_restore + } + restorer = tf.train.Saver(renamed_variables) + with tf.Session() as sess: + restorer.restore(sess, checkpoint_path) + + # Show the generated flow in TensorBoard + if 'flow' in predictions: + pred_flow_0 = predictions['flow'][0, :, :, :] + pred_flow_0 = tf.py_func(flow_to_image, [pred_flow_0], tf.uint8) + pred_flow_1 = predictions['flow'][1, :, :, :] + pred_flow_1 = tf.py_func(flow_to_image, [pred_flow_1], tf.uint8) + pred_flow_img = tf.stack([pred_flow_0, pred_flow_1], 0) + tf.summary.image('pred_flow', pred_flow_img, max_outputs=2) + + true_flow_0 = flow[0, :, :, :] + true_flow_0 = tf.py_func(flow_to_image, [true_flow_0], tf.uint8) + true_flow_1 = flow[1, :, :, :] + true_flow_1 = tf.py_func(flow_to_image, [true_flow_1], tf.uint8) + true_flow_img = tf.stack([true_flow_0, true_flow_1], 0) + tf.summary.image('true_flow', true_flow_img, max_outputs=2) + + train_op = slim.learning.create_train_op( + total_loss, + optimizer, + summarize_gradients=True) + + if self.debug: + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + tf.train.start_queue_runners(sess) + slim.learning.train_step( + sess, + train_op, + self.global_step, + { + 'should_trace': tf.constant(1), + 'should_log': tf.constant(1), + 'logdir': log_dir + '/debug', + } + ) + else: + slim.learning.train( + train_op, + log_dir, + # session_config=tf.ConfigProto(allow_soft_placement=True), + global_step=self.global_step, + save_summaries_secs=60, + number_of_steps=training_schedule['max_iter'] + ) diff --git a/Codes/flownet2/src/ops/build/.gitkeep b/Codes/flownet2/src/ops/build/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/Codes/flownet2/src/ops/build/.gitkeep diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc new file mode 100644 index 0000000..4e92f45 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cc @@ -0,0 +1,160 @@ +#define EIGEN_USE_THREADS + +#include "correlation_kernel.h" +#include "pad.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +template<typename Device> +class CorrelationGradKernel : public OpKernel { + public: + explicit CorrelationGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) { + // Get the attributes + OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad)); + + OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd")); + } + + void Compute(OpKernelContext *ctx) override { + // Get the input images and verify their dimensions + const Tensor& gradients_t = ctx->input(0); + const Tensor& input_a_t = ctx->input(1); + const Tensor& input_b_t = ctx->input(2); + + OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4")); + OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4")); + + // Get dimensions of input + const int batch_size = input_a_t.dim_size(0); + const int in_height = input_a_t.dim_size(1); + const int in_width = input_a_t.dim_size(2); + const int in_channels = input_a_t.dim_size(3); + const int in_count_per_sample = in_height * in_width * in_channels; + const int padded_height = in_height + 2 * pad; + const int padded_width = in_width + 2 * pad; + + // The size of unreachable border region on each side + const int kernel_radius = (kernel_size - 1) / 2; + const int border_size = max_displacement + kernel_radius; + + // Calculate the output dimensions + const int out_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1); + const int out_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1); + + const int neighborhood_grid_radius = max_displacement / stride_2; + const int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; + const int out_channels = neighborhood_grid_width * neighborhood_grid_width; + + // Allocate the memory for the outputs + Tensor *output_a_gradient_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_a_t.shape(), &output_a_gradient_t)); + Tensor *output_b_gradient_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(1, input_b_t.shape(), &output_b_gradient_t)); + + // Get the tensors + auto gradients = gradients_t.tensor<float, 4>(); + auto input_a = input_a_t.tensor<float, 4>(); + auto input_b = input_b_t.tensor<float, 4>(); + auto output_a_gradient = output_a_gradient_t->tensor<float, 4>(); + auto output_b_gradient = output_b_gradient_t->tensor<float, 4>(); + + // Create temporary tensors for padded inputs + Tensor padded_input_a_t, padded_input_b_t; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, padded_height, padded_width, in_channels }), + &padded_input_a_t)); + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, padded_height, padded_width, in_channels }), + &padded_input_b_t)); + auto padded_input_a = padded_input_a_t.tensor<float, 4>(); + auto padded_input_b = padded_input_b_t.tensor<float, 4>(); + + // Pad the inputs + Pad(ctx->eigen_device<Device>(), + input_a.data(), + batch_size, + in_height, + in_width, + in_channels, + padded_height, + padded_width, + padded_input_a.data()); + Pad(ctx->eigen_device<Device>(), + input_b.data(), + batch_size, + in_height, + in_width, + in_channels, + padded_height, + padded_width, + padded_input_b.data()); + + CorrelationGradA(ctx->eigen_gpu_device(), + batch_size, + out_width, + out_height, + out_channels, + max_displacement, + neighborhood_grid_radius, + neighborhood_grid_width, + kernel_radius, + stride_1, + stride_2, + in_width, + in_height, + padded_width, + padded_height, + in_channels, + in_count_per_sample, + pad, + padded_input_b.data(), + gradients.data(), + output_a_gradient.data()); + + CorrelationGradB(ctx->eigen_gpu_device(), + batch_size, + out_width, + out_height, + out_channels, + max_displacement, + neighborhood_grid_radius, + neighborhood_grid_width, + kernel_radius, + stride_1, + stride_2, + in_width, + in_height, + padded_width, + padded_height, + in_channels, + in_count_per_sample, + pad, + padded_input_a.data(), + gradients.data(), + output_b_gradient.data()); + } + + private: + int kernel_size; + int max_displacement; + int stride_1; + int stride_2; + int pad; +}; + +REGISTER_KERNEL_BUILDER(Name("CorrelationGrad") + .Device(DEVICE_GPU), + CorrelationGradKernel<GPUDevice>) +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc new file mode 100644 index 0000000..19e3a40 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_grad_kernel.cu.cc @@ -0,0 +1,262 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#define ROUND_OFF 50000 + +#include <stdio.h> +#include <iostream> + +#include "correlation_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +__global__ void CorrelateDataBackward0(const int nthreads, + int item, + int out_width, + int out_height, + int out_channels, + int max_displacement, + int neighborhood_grid_radius, + int neighborhood_grid_width, + int kernel_radius, + int stride_1, + int stride_2, + int in_width, + int in_height, + int padded_in_width, + int padded_in_height, + int in_channels, + int in_count_per_sample, + int pad_size, + float *output_a_gradient, + const float *input_b, + const float *gradient) +{ + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int k = index % in_channels; // channels + int x = (index / in_channels) % in_width + pad_size; // w-pos + int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos + + // Get X,Y ranges and clamp + // round_off is a trick to enable integer division with ceil, even for + // negative numbers + // We use a large offset, for the inner part not to become negative. + const int round_off = ROUND_OFF; + const int round_off_s1 = stride_1 * round_off; + + // We add round_off before_s1 the int division and subtract round_off after + // it, to ensure the formula matches ceil behavior: + int xmin = (x - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 - + round_off; + int ymin = (y - 2 * kernel_radius - max_displacement + round_off_s1 - 1) / stride_1 + 1 - + round_off; + + // Same here: + int xmax = (x - max_displacement + round_off_s1) / stride_1 - round_off; + int ymax = (y - max_displacement + round_off_s1) / stride_1 - round_off; + + float sum = 0; + + if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) { + xmin = max(0, xmin); + xmax = min(out_width - 1, xmax); + + ymin = max(0, ymin); + ymax = min(out_height - 1, ymax); + + for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) { + for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) { + // Get input_b data: + int s2o = stride_2 * o; + int s2p = stride_2 * p; + int idx_input_b = ((item * padded_in_height + (y + s2p)) * padded_in_width + (x + s2o)) * + in_channels + k; + float input_b_tmp = input_b[idx_input_b]; // input_b[x+s2o,y+s2p,k] + + // Index offset for gradient in following loops: + int op = (p + neighborhood_grid_radius) * neighborhood_grid_width + + (o + neighborhood_grid_radius); // index [o,p] + + for (int y = ymin; y <= ymax; y++) { + for (int x = xmin; x <= xmax; x++) { + // gradient[x,y,o,p] + int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op; + sum += gradient[idx_gradient] * input_b_tmp; + } + } + } + } + } + const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels; + const int input_a_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k; + output_a_gradient[input_a_idx + item * in_count_per_sample] = sum / (float)sumelems; + } +} + +__global__ void CorrelateDataBackward1(const int nthreads, + int item, + int out_width, + int out_height, + int out_channels, + int max_displacement, + int neighborhood_grid_radius, + int neighborhood_grid_width, + int kernel_radius, + int stride_1, + int stride_2, + int in_width, + int in_height, + int padded_in_width, + int padded_in_height, + int in_channels, + int in_count_per_sample, + int pad_size, + float *output_b_gradient, + const float *input_a, + const float *gradient) +{ + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int k = index % in_channels; // channels + int x = (index / in_channels) % in_width + pad_size; // w-pos + int y = (index / in_channels / in_width) % in_height + pad_size; // h-pos + + // round_off is a trick to enable integer division with ceil, even for + // negative numbers + // We use a large offset, for the inner part not to become negative. + const int round_off = ROUND_OFF; + const int round_off_s1 = stride_1 * round_off; + + float sum = 0; + + // Height (y) + for (int p = -neighborhood_grid_radius; p <= neighborhood_grid_radius; p++) { + // Width (x) + for (int o = -neighborhood_grid_radius; o <= neighborhood_grid_radius; o++) { + int s2o = stride_2 * o; + int s2p = stride_2 * p; + + // Get X,Y ranges and clamp + // We add round_off before_s1 the int division and subtract round_off + // after it, to ensure the formula matches ceil behavior: + int xmin = (x - 2 * kernel_radius - max_displacement - s2o + round_off_s1 - 1) / stride_1 + + 1 - round_off; + int ymin = (y - 2 * kernel_radius - max_displacement - s2p + round_off_s1 - 1) / stride_1 + + 1 - round_off; + + // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w) + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + + // Same here: + int xmax = (x - max_displacement - s2o + round_off_s1) / stride_1 - round_off; + int ymax = (y - max_displacement - s2p + round_off_s1) / stride_1 - round_off; + + if ((xmax >= 0) && (ymax >= 0) && (xmin <= out_width - 1) && (ymin <= out_height - 1)) { + xmin = max(0, xmin); + xmax = min(out_width - 1, xmax); + + ymin = max(0, ymin); + ymax = min(out_height - 1, ymax); + + // Get input_a data: + int idx_input_a = ((item * padded_in_height + (y - s2p)) * padded_in_width + (x - s2o)) * + in_channels + k; + float input_a_tmp = input_a[idx_input_a]; + + // Index offset for gradient in following loops: + int op = (p + neighborhood_grid_radius) * neighborhood_grid_width + + (o + neighborhood_grid_radius); // index [o,p] + + for (int y = ymin; y <= ymax; y++) { + for (int x = xmin; x <= xmax; x++) { + int idx_gradient = ((item * out_height + y) * out_width + x) * out_channels + op; + sum += gradient[idx_gradient] * input_a_tmp; + } + } + } + } + } + const int sumelems = (kernel_radius * 2 + 1) * (kernel_radius * 2 + 1) * in_channels; + const int input_b_idx = ((y - pad_size) * in_width + (x - pad_size)) * in_channels + k; + output_b_gradient[input_b_idx + item * in_count_per_sample] = sum / (float)sumelems; + } +} + +void CorrelationGradA(const GPUDevice& device, + const int batch_size, + const int out_width, + const int out_height, + const int out_channels, + const int max_displacement, + const int neighborhood_grid_radius, + const int neighborhood_grid_width, + const int kernel_radius, + const int stride_1, + const int stride_2, + const int in_width, + const int in_height, + const int padded_in_width, + const int padded_in_height, + const int in_channels, + const int in_count_per_sample, // h * w * ch + const int pad, + const float *input_b, + const float *gradient, + float *output_a_gradient) { + CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device); + + for (int n = 0; n < batch_size; n++) { + CorrelateDataBackward0 << < config.block_count, config.thread_per_block, 0, + device.stream() >> > ( + in_count_per_sample, + n, out_width, out_height, out_channels, + max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius, + stride_1, stride_2, + in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad, + output_a_gradient, input_b, gradient); + } +} + +void CorrelationGradB(const GPUDevice& device, + const int batch_size, + const int out_width, + const int out_height, + const int out_channels, + const int max_displacement, + const int neighborhood_grid_radius, + const int neighborhood_grid_width, + const int kernel_radius, + const int stride_1, + const int stride_2, + const int in_width, + const int in_height, + const int padded_in_width, + const int padded_in_height, + const int in_channels, + const int in_count_per_sample, + const int pad, + const float *input_a, + const float *gradient, + float *output_b_gradient) { + CudaLaunchConfig config = GetCudaLaunchConfig(in_count_per_sample, device); + + for (int n = 0; n < batch_size; n++) { + CorrelateDataBackward1 << < config.block_count, config.thread_per_block, 0, + device.stream() >> > ( + in_count_per_sample, + n, out_width, out_height, out_channels, + max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius, + stride_1, stride_2, + in_width, in_height, padded_in_width, padded_in_height, in_channels, in_count_per_sample, pad, + output_b_gradient, input_a, gradient); + } +} +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc new file mode 100644 index 0000000..f8a5193 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cc @@ -0,0 +1,137 @@ +#define EIGEN_USE_THREADS + +#include <utility> + +#include "correlation_kernel.h" +#include "pad.h" + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +template<typename Device> +class CorrelationKernel : public OpKernel { + public: + explicit CorrelationKernel(OpKernelConstruction *ctx) : OpKernel(ctx) { + // Get the attributes + OP_REQUIRES_OK(ctx, ctx->GetAttr("kernel_size", &kernel_size)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_displacement", &max_displacement)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_1", &stride_1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("stride_2", &stride_2)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pad", &pad)); + + OP_REQUIRES(ctx, kernel_size % 2 != 0, errors::InvalidArgument("kernel_size must be odd")); + } + + void Compute(OpKernelContext *ctx) override { + // Get the input images and transforms and verify their dimensions + const Tensor& input_a_t = ctx->input(0); + const Tensor& input_b_t = ctx->input(1); + + OP_REQUIRES(ctx, input_a_t.dims() == 4, errors::InvalidArgument("input_a must have rank 4")); + OP_REQUIRES(ctx, input_b_t.dims() == 4, errors::InvalidArgument("input_b must have rank 4")); + + // Get dimensions of input (already padded) + int batch_size = input_a_t.dim_size(0); + int input_height = input_a_t.dim_size(1); + int input_width = input_a_t.dim_size(2); + int input_channels = input_a_t.dim_size(3); + int padded_height = input_height + 2 * pad; + int padded_width = input_width + 2 * pad; + + // The size of unreachable border region on each side + int kernel_radius = (kernel_size - 1) / 2; + int border_size = max_displacement + kernel_radius; + + // Calculate the output dimensions + int output_height = ceil((float)(padded_height - border_size * 2) / (float)stride_1); + int output_width = ceil((float)(padded_width - border_size * 2) / (float)stride_1); + + OP_REQUIRES(ctx, output_height >= 1, + errors::InvalidArgument("Neighborhood and kernel don't fit in input height.")); + OP_REQUIRES(ctx, output_width >= 1, + errors::InvalidArgument("Neighborhood and kernel don't fit in input width.")); + + int neighborhood_grid_radius = max_displacement / stride_2; + int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; + int output_channels = neighborhood_grid_width * neighborhood_grid_width; + + // Allocate the memory for the output + Tensor *output_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output( + 0, + TensorShape({ batch_size, output_height, output_width, output_channels }), + &output_t)); + + // Get the tensors + auto input_a = input_a_t.tensor<float, 4>(); + auto input_b = input_b_t.tensor<float, 4>(); + auto output = output_t->tensor<float, 4>(); + + // Create temporary tensors for padded inputs + Tensor padded_input_a_t, padded_input_b_t; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, padded_height, padded_width, input_channels }), + &padded_input_a_t)); + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, padded_height, padded_width, input_channels }), + &padded_input_b_t)); + auto padded_input_a = padded_input_a_t.tensor<float, 4>(); + auto padded_input_b = padded_input_b_t.tensor<float, 4>(); + + // Pad the inputs + Pad(ctx->eigen_device<Device>(), + input_a.data(), + batch_size, + input_height, + input_width, + input_channels, + padded_height, + padded_width, + padded_input_a.data()); + Pad(ctx->eigen_device<Device>(), + input_b.data(), + batch_size, + input_height, + input_width, + input_channels, + padded_height, + padded_width, + padded_input_b.data()); + + // Perform cross correlation + Correlation(ctx->eigen_device<Device>(), + padded_input_a.data(), + padded_input_b.data(), + batch_size, + output_height, + output_width, + output_channels, + output_height * output_width * output_channels, + padded_height, + padded_width, + input_channels, + max_displacement, + neighborhood_grid_radius, + neighborhood_grid_width, + kernel_radius, + kernel_size, + stride_1, + stride_2, + output.data()); + } + + private: + int kernel_size; + int max_displacement; + int stride_1; + int stride_2; + int pad; +}; + +REGISTER_KERNEL_BUILDER(Name("Correlation") + .Device(DEVICE_GPU), + CorrelationKernel<GPUDevice>) +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc new file mode 100644 index 0000000..c63e489 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.cu.cc @@ -0,0 +1,153 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#define WARPS_PER_BLOCK 1 +#define THREADS_PER_WARP 32 + +#include <stdio.h> +#include <iostream> + +#include "correlation_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +__global__ void CorrelateData(int batch_size, + int out_width, + int out_height, + int out_channels, + int out_count, + int max_displacement, + int neighborhood_grid_radius, + int neighborhood_grid_width, + int kernel_radius, + int kernel_size, + int stride_1, + int stride_2, + int in_width_padded, + int in_height_padded, + int in_channels, + const float *input_a, + const float *input_b, + float *output) { + extern __shared__ char patch_data_char[]; + + float *patch_data = (float *)patch_data_char; + + // First (upper left) position of kernel upper-left corner in current center + // position of neighborhood in image 1 + int x1 = blockIdx.x * stride_1 + max_displacement; + int y1 = blockIdx.y * stride_1 + max_displacement; + int item = blockIdx.z; + int ch_off = threadIdx.x; + + // Load 3D patch into shared shared memory + // HEIGHT + for (int j = 0; j < kernel_size; j++) { + // WIDTH + for (int i = 0; i < kernel_size; i++) { + int ji_off = ((j * kernel_size) + i) * in_channels; + + // CHANNELS + for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) { + int idx1 = ((item * in_height_padded + y1 + j) * in_width_padded + x1 + i) * + in_channels + ch; + int idxPatchData = ji_off + ch; + patch_data[idxPatchData] = input_a[idx1]; + } + } + } + + __syncthreads(); + + __shared__ float sum[WARPS_PER_BLOCK * THREADS_PER_WARP]; + + // Compute correlation + for (int out_channel = 0; out_channel < out_channels; out_channel++) { + sum[ch_off] = 0; + + int s2o = (out_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride_2; + int s2p = (out_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride_2; + int x2 = x1 + s2o; + int y2 = y1 + s2p; + + // HEIGHT + for (int j = 0; j < kernel_size; j++) { + // WIDTH + for (int i = 0; i < kernel_size; i++) { + int ji_off = ((j * kernel_size) + i) * in_channels; + + // CHANNELS + for (int ch = ch_off; ch < in_channels; ch += (WARPS_PER_BLOCK * THREADS_PER_WARP)) { + int idxPatchData = ji_off + ch; + int idx2 = ((item * in_height_padded + y2 + j) * in_width_padded + x2 + i) * + in_channels + ch; + + sum[ch_off] += patch_data[idxPatchData] * input_b[idx2]; + } + } + } + + __syncthreads(); + + if (ch_off == 0) { + float total_sum = 0; + + for (int idx = 0; idx < WARPS_PER_BLOCK * THREADS_PER_WARP; idx++) { + total_sum += sum[idx]; + } + const int sumelems = kernel_size * kernel_size * in_channels; + const int index = (blockIdx.y * out_width + blockIdx.x) * out_channels + out_channel; + + /* from Caffe: const int index = ((out_channel * out_height + + blockIdx.y) * out_width) + blockIdx.x; */ + output[index + item * out_count] = total_sum / (float)sumelems; + + // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w) + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + // n = 0 + // caffe: ((k * H + h) * W + w) + n * K * H * W + // tf: (h * W + w) * K + k + n * H * W * K + } + } +} + +void Correlation(const GPUDevice& device, + const float *input_a, + const float *input_b, + const int batch_size, + const int out_height, + const int out_width, + const int out_channels, + const int out_count, + const int in_height_padded, + const int in_width_padded, + const int in_channels, + int max_displacement, + int neighborhood_grid_radius, + int neighborhood_grid_width, + int kernel_radius, + int kernel_size, + int stride_1, + int stride_2, + float *output) { + dim3 totalBlocksCorr(out_width, out_height, batch_size); + dim3 threadsPerBlock(THREADS_PER_WARP *WARPS_PER_BLOCK); + const int shared_memory_per_block = (kernel_size * kernel_size) * in_channels; + + CorrelateData << < totalBlocksCorr, threadsPerBlock, shared_memory_per_block * sizeof(float), + device.stream() >> > ( + batch_size, out_width, out_height, out_channels, out_count, + max_displacement, neighborhood_grid_radius, neighborhood_grid_width, kernel_radius, + kernel_size, stride_1, stride_2, in_width_padded, in_height_padded, in_channels, + input_a, input_b, output); +} +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/correlation/correlation_kernel.h b/Codes/flownet2/src/ops/correlation/correlation_kernel.h new file mode 100644 index 0000000..a1dfb62 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_kernel.h @@ -0,0 +1,77 @@ +#ifndef FLOWNET_CORRELATION_H_ +#define FLOWNET_CORRELATION_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +void Correlation(const GPUDevice& device, + const float *input_a, + const float *input_b, + const int batch_size, + const int out_height, + const int out_width, + const int out_channels, + const int out_count, + const int in_height_padded, + const int in_width_padded, + const int in_channels, + int max_displacement, + int neighborhood_grid_radius, + int neighborhood_grid_width, + int kernel_radius, + int kernel_size, + int stride_1, + int stride_2, + float *output); + + +void CorrelationGradA(const GPUDevice& device, + const int batch_size, + const int out_width, + const int out_height, + const int out_channels, + const int max_displacement, + const int neighborhood_grid_radius, + const int neighborhood_grid_width, + const int kernel_radius, + const int stride_1, + const int stride_2, + const int in_width, + const int in_height, + const int padded_in_width, + const int padded_in_height, + const int in_channels, + const int in_count_per_sample, + const int pad, + const float *input_b, + const float *gradient, + float *output_a_gradient); + +void CorrelationGradB(const GPUDevice& device, + const int batch_size, + const int out_width, + const int out_height, + const int out_channels, + const int max_displacement, + const int neighborhood_grid_radius, + const int neighborhood_grid_width, + const int kernel_radius, + const int stride_1, + const int stride_2, + const int in_width, + const int in_height, + const int padded_in_width, + const int padded_in_height, + const int in_channels, + const int in_count_per_sample, + const int pad, + const float *input_a, + const float *gradient, + float *output_b_gradient); +} // end namespace tensorflow + +#endif // FLOWNET_CORRELATION_H_ diff --git a/Codes/flownet2/src/ops/correlation/correlation_op.cc b/Codes/flownet2/src/ops/correlation/correlation_op.cc new file mode 100644 index 0000000..4f420f0 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/correlation_op.cc @@ -0,0 +1,83 @@ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +Status SetOutput(InferenceContext *c) { + ShapeHandle input_a, input_b, input; + + // Get shapes of both inputs and verify they are rank 4 + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_a)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_b)); + + // Verify inputs are same dimensions + TF_RETURN_IF_ERROR(c->Merge(input_a, input_b, &input)); + + // Get the attributes + int kernel_size, max_displacement, stride_1, stride_2, pad; + TF_RETURN_IF_ERROR(c->GetAttr("kernel_size", &kernel_size)); + TF_RETURN_IF_ERROR(c->GetAttr("max_displacement", &max_displacement)); + TF_RETURN_IF_ERROR(c->GetAttr("stride_1", &stride_1)); + TF_RETURN_IF_ERROR(c->GetAttr("stride_2", &stride_2)); + TF_RETURN_IF_ERROR(c->GetAttr("pad", &pad)); + + // Get dimensions of input (already padded) + int64 batch = c->Value(c->Dim(input, 0)); + int64 input_height = c->Value(c->Dim(input, 1)); + int64 input_width = c->Value(c->Dim(input, 2)); + int64 padded_height = input_height + 2 * pad; + int64 padded_width = input_width + 2 * pad; + + // The size of unreachable border region on each side + int kernel_radius = (kernel_size - 1) / 2; + int border_size = max_displacement + kernel_radius; + + // Calculate the output dimensions + int64 output_height = (int64)ceil((float)(padded_height - border_size * 2) / (float)stride_1); + int64 output_width = (int64)ceil((float)(padded_width - border_size * 2) / (float)stride_1); + + // TODO: Verify output size >= 1 + + int neighborhood_grid_radius = max_displacement / stride_2; + int neighborhood_grid_width = neighborhood_grid_radius * 2 + 1; + int64 output_channels = neighborhood_grid_width * neighborhood_grid_width; + + // Set output shape + c->set_output(0, c->MakeShape({ batch, output_height, output_width, output_channels })); + return Status::OK(); +} + +REGISTER_OP("Correlation") +.Input("input_a: float32") +.Input("input_b: float32") +.Attr("kernel_size: int") +.Attr("max_displacement: int") +.Attr("stride_1: int") +.Attr("stride_2: int") +.Attr("pad: int") +.Output("output: float32") +.SetShapeFn(SetOutput); + +REGISTER_OP("CorrelationGrad") +.Input("gradients: float32") +.Input("input_a: float32") +.Input("input_b: float32") +.Attr("kernel_size: int") +.Attr("max_displacement: int") +.Attr("stride_1: int") +.Attr("stride_2: int") +.Attr("pad: int") +.Output("backprops_a: float32") +.Output("backprops_b: float32") +.SetShapeFn([](InferenceContext *c) { + // Output gradients should be the same dimensions as the inputs + ShapeHandle out; + TF_RETURN_IF_ERROR(c->Merge(c->input(1), c->input(2), &out)); + c->set_output(0, out); + c->set_output(1, out); + return Status::OK(); + }); +} // namespace tensorflow diff --git a/Codes/flownet2/src/ops/correlation/pad.cu.cc b/Codes/flownet2/src/ops/correlation/pad.cu.cc new file mode 100644 index 0000000..0b6c93d --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/pad.cu.cc @@ -0,0 +1,76 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include <stdio.h> +#include <iostream> + +#include "pad.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +__global__ void PadData( + const float *in, + int in_widthheight, + int in_width, + int in_height, + int out_width, + int out_height, + int channels, + int padding, + float *out) { + int xy = blockIdx.x * blockDim.x + threadIdx.x; + + int x = xy % in_width; + int y = xy / in_width; + int ch = blockIdx.y; + int n = blockIdx.z; + + if (xy >= in_widthheight) { + out[((n * out_height + y) * out_width + x) * channels + ch] = 0.0; + return; + } + + float value = in[((n * in_height + y) * in_width + x) * channels + ch]; + + __syncthreads(); + + int xpad = x + padding; + int ypad = y + padding; + + out[((n * out_height + ypad) * out_width + xpad) * channels + ch] = value; +} + +void Pad(const GPUDevice& device, + const float *input, + int batch_size, + int input_height, + int input_width, + int input_channels, + int output_height, + int output_width, + float *output) { + int in_widthheight = input_width * input_height; + int threads_per_block = 16; + dim3 totalBlocks((in_widthheight - 1) / threads_per_block + 1, input_channels, batch_size); + + cudaMemset(output, 0, batch_size * output_height * output_width * input_channels * sizeof(float)); + + int padding = (output_height - input_height) / 2; + + // LAUNCH KERNEL + PadData << < totalBlocks, threads_per_block, 0, device.stream() >> > ( + input, + in_widthheight, + input_width, + input_height, + output_width, + output_height, + input_channels, + padding, + output); +} +} +#endif // if GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/correlation/pad.h b/Codes/flownet2/src/ops/correlation/pad.h new file mode 100644 index 0000000..afb4df0 --- /dev/null +++ b/Codes/flownet2/src/ops/correlation/pad.h @@ -0,0 +1,20 @@ +#ifndef FLOWNET_PAD_H_ +#define FLOWNET_PAD_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +void Pad(const GPUDevice& device, + const float *input, + int batch_size, + int input_height, + int input_width, + int input_channels, + int output_height, + int output_width, + float *output); +} // end namespace tensorflow + +#endif // ifndef FLOWNET_PAD_H_ diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc new file mode 100644 index 0000000..eefe247 --- /dev/null +++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.cc @@ -0,0 +1,47 @@ +#define EIGEN_USE_THREADS + +#include "downsample_kernel.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +template <typename Device> +class DownsampleKernel : public OpKernel { + public: + explicit DownsampleKernel(OpKernelConstruction* ctx) : OpKernel(ctx) { + // Get the size [height, width] tensor and verify its dimensions + OP_REQUIRES_OK(ctx, ctx->GetAttr("size", &size_)); + OP_REQUIRES(ctx, size_.size() == 2, errors::InvalidArgument("size must be 2 dimensions")); + } + + void Compute(OpKernelContext* ctx) override { + // Get the input images and transforms and verify their dimensions + const Tensor& input_t = ctx->input(0); + OP_REQUIRES(ctx, input_t.dims() == 4, + errors::InvalidArgument("Input images must have rank 4")); + + // Allocate the memory for the output + Tensor* output_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output( + 0, TensorShape({input_t.dim_size(0), size_[0], size_[1], input_t.dim_size(3)}), &output_t)); + + // Perform flow augmentation + auto input = input_t.tensor<float, 4>(); + auto output = output_t->tensor<float, 4>(); + + Downsample(ctx->eigen_gpu_device(), input, output); + } + + private: + std::vector<int32> size_; +}; + +REGISTER_KERNEL_BUILDER(Name("Downsample") + .Device(DEVICE_GPU), + DownsampleKernel<GPUDevice>) +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel.h b/Codes/flownet2/src/ops/downsample/downsample_kernel.h new file mode 100644 index 0000000..bcc4e3f --- /dev/null +++ b/Codes/flownet2/src/ops/downsample/downsample_kernel.h @@ -0,0 +1,18 @@ +#ifndef FLOWNET_DOWNSAMPLE_H_ +#define FLOWNET_DOWNSAMPLE_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +bool Downsample(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor input, + typename TTypes<float, 4>::Tensor output); + +} // end namespace tensorflow + +#endif // FLOWNET_DOWNSAMPLE_H_ diff --git a/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc new file mode 100644 index 0000000..b7629a0 --- /dev/null +++ b/Codes/flownet2/src/ops/downsample/downsample_kernel_gpu.cu.cc @@ -0,0 +1,108 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include <stdio.h> +#include <iostream> + +#include "downsample_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +#define CUDART_NAN_F __int_as_float(0x7fffffff) + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +__global__ void DownsampleKernel( + const int32 nthreads, + const float* input_ptr, + float* output_ptr, + const int in_width, + const int in_height, + const int out_width, + const int out_height, + const int channels, + const float width_scale, + const float height_scale, + const int wradius, + const int hradius) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + const int c = index % channels; + const int destx = (index / channels) % out_width; + const int desty = (index / channels / out_width) % out_height; + const int n = (index / channels / out_width) / out_height; + + const float srcx = ((float)destx / (float)(out_width - 1)) * (float)(in_width - 1); + const float srcy = ((float)desty / (float)(out_height - 1)) * (float)(in_height - 1); + + const int isrcx = round(srcx); + const int isrcy = round(srcy); + + float accum_value = 0; + float accum_weight = 0; + float accum_nan = 0; + + for (int dy = -hradius; dy <= hradius; dy++) { + int yoff = isrcy + dy; + // + for (int dx = -wradius; dx <= wradius; dx++) { + int xoff = isrcx + dx; + + if (xoff >= 0 && yoff >= 0 && xoff < in_width && yoff < in_height) { + int idx = ((n * in_height + yoff) * in_width + xoff) * channels + c; + float sample = input_ptr[idx]; + float weight = fmaxf(0.0f, 1.0f - (fabsf((float)xoff - srcx) / width_scale)) + * fmaxf(0.0f, 1.0f - (fabsf((float)yoff - srcy) / height_scale)); + if (sample != sample) { // isnan + accum_nan += weight; + sample = 0; + weight = 0; + } + accum_value += sample * weight; + accum_weight += weight; + } + } + } + + if (accum_nan / accum_weight > 0.5) { + output_ptr[index] = CUDART_NAN_F; + } else { + output_ptr[index] = accum_value / accum_weight; + } + } +} + +bool Downsample(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor input, + typename TTypes<float, 4>::Tensor output) { + const int batch_size = output.dimension(0); + const int out_height = output.dimension(1); + const int out_width = output.dimension(2); + const int out_channels = output.dimension(3); + const int total_count = batch_size * out_height * out_width * out_channels; + + const int in_height = input.dimension(1); + const int in_width = input.dimension(2); + + const float width_scale = (float)(in_width - 1) / (float)(out_width - 1); + const float height_scale = (float)(in_height - 1) / (float)(out_height - 1); + + const int wradius = ceil(width_scale); + const int hradius = ceil(height_scale); + + CudaLaunchConfig config = GetCudaLaunchConfig(total_count, device); + DownsampleKernel<<<config.block_count, config.thread_per_block, 0, + device.stream()>>>(total_count, input.data(), output.data(), + in_width, in_height, out_width, out_height, out_channels, + width_scale, height_scale, wradius, hradius); + return device.ok(); +} + +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/downsample/downsample_op.cc b/Codes/flownet2/src/ops/downsample/downsample_op.cc new file mode 100644 index 0000000..6980dc7 --- /dev/null +++ b/Codes/flownet2/src/ops/downsample/downsample_op.cc @@ -0,0 +1,30 @@ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; +using shape_inference::DimensionHandle; + +Status SetOutputToSizedImage(InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); + DimensionHandle batch = c->Dim(input, 0); + DimensionHandle depth = c->Dim(input, 3); + std::vector<int32> size_; + c->GetAttr("size", &size_); + DimensionHandle height = c->MakeDim(size_[0]); + DimensionHandle width = c->MakeDim(size_[1]); + c->set_output(0, c->MakeShape({batch, height, width, depth})); + return Status::OK(); +} + +REGISTER_OP("Downsample") + .Input("input: float32") + .Attr("size: list(int) >= 2") + .Output("output: float32") + .SetShapeFn(SetOutputToSizedImage); + +} // namespace tensorflow diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc new file mode 100644 index 0000000..b5d9602 --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cc @@ -0,0 +1,48 @@ +#define EIGEN_USE_THREADS + +#include "flow_warp.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +template<typename Device> +class FlowWarpKernel : public OpKernel { + public: + explicit FlowWarpKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext *ctx) override { + // Get the input image and flow and verify dimensions + const Tensor& input_t = ctx->input(0); + const Tensor& flow_t = ctx->input(1); + + OP_REQUIRES(ctx, input_t.dims() == 4, + errors::InvalidArgument("Input image must have rank 4")); + OP_REQUIRES(ctx, flow_t.dims() == 4, + errors::InvalidArgument("Input flow must have rank 4")); + OP_REQUIRES(ctx, + input_t.dim_size(0) == flow_t.dim_size(0) && input_t.dim_size( + 1) == flow_t.dim_size(1) && input_t.dim_size(2) == flow_t.dim_size(2), + errors::InvalidArgument( + "Input image and flow must have same N x H x W dimensions")); + + // Allocate the memory for the output + Tensor *output_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input_t.shape(), &output_t)); + + // Perform flow augmentation + auto input = input_t.tensor<float, 4>(); + auto flow = flow_t.tensor<float, 4>(); + auto output = output_t->tensor<float, 4>(); + + FlowWarp(ctx->eigen_gpu_device(), input, flow, output); + } +}; + +REGISTER_KERNEL_BUILDER(Name("FlowWarp") + .Device(DEVICE_GPU), + FlowWarpKernel<GPUDevice>) +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc new file mode 100644 index 0000000..2007151 --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.cu.cc @@ -0,0 +1,130 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include <stdio.h> +#include <iostream> + +#include "flow_warp.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +#define RA_TILE 32 +#define RA_ROWS 8 + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +__global__ void FlowWarpKernel( + const float *image, + const float *flow, + float *warped, + const int batch_size, + const int channels, + const int cblocks, + const int width, + const int wblocks, + const int height, + const int width_height) { + int y = blockIdx.y; + int n = blockIdx.z; + + __shared__ float x2_buf[FW_TILE_X], y2_buf[FW_TILE_X]; + __shared__ float buffer[FW_TILE_C][FW_TILE_X + 1]; + + int x; + int c; + + x = blockIdx.x * FW_TILE_X + threadIdx.x; + + if ((threadIdx.y == 0) && (x < width)) { + const int idx = ((n * height + y) * width + x) * 2; + x2_buf[threadIdx.x] = float(x) + flow[idx]; + y2_buf[threadIdx.x] = float(y) + flow[idx + 1]; + } + + __syncthreads(); + + float x2 = x2_buf[threadIdx.y]; + float y2 = y2_buf[threadIdx.y]; + + int ix2_L = int(x2); + int iy2_T = int(y2); + int ix2_R = min(ix2_L + 1, width - 1); + int iy2_B = min(iy2_T + 1, height - 1); + + int off_TL = ((n * height + iy2_T) * width + ix2_L) * channels; + int off_TR = ((n * height + iy2_T) * width + ix2_R) * channels; + int off_BL = ((n * height + iy2_B) * width + ix2_L) * channels; + int off_BR = ((n * height + iy2_B) * width + ix2_R) * channels; + + float alpha = x2 - ix2_L; + float beta = y2 - iy2_T; + float coeffTL = (1 - alpha) * (1 - beta); + float coeffTR = alpha * (1 - beta); + float coeffBL = (1 - alpha) * beta; + float coeffBR = alpha * beta; + + for (int cb = 0; cb < cblocks; cb++) { + __syncthreads(); + + buffer[threadIdx.y][threadIdx.x] = 0.0; + + __syncthreads(); + + c = cb * FW_TILE_C + threadIdx.x; + + if ((x2 >= 0) && (y2 >= 0) && (x2 < width) && (y2 < height) && (c < channels)) { + buffer[threadIdx.y][threadIdx.x] = // buffer [x][c] + coeffTL * image[off_TL + c] + + coeffTR * image[off_TR + c] + + coeffBL * image[off_BL + c] + + coeffBR * image[off_BR + c]; + } + + __syncthreads(); + + c = cb * FW_TILE_C + threadIdx.y; + x = blockIdx.x * FW_TILE_X + threadIdx.x; + + if ((c < channels) && (x < width)) { + warped[((n * height + y) * width + x) * channels + c] = buffer[threadIdx.x][threadIdx.y]; + } + } +} + +void FlowWarp(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor input, + typename TTypes<float, 4>::ConstTensor flow, + typename TTypes<float, 4>::Tensor output) { + const int batch_size = input.dimension(0); + const int height = input.dimension(1); + const int width = input.dimension(2); + const int channels = input.dimension(3); + + const int width_height = width * height; + int wblocks = ((width - 1) / FW_TILE_X + 1); + int cblocks = ((channels - 1) / FW_TILE_C + 1); + dim3 warpThreads(FW_TILE_X, FW_TILE_C); + dim3 warpBlocks(wblocks, height, batch_size); + + cudaMemset(output.data(), 0, batch_size * height * width * 2 * sizeof(float)); + + FlowWarpKernel << < warpBlocks, warpThreads, 0, device.stream() >> > ( + input.data(), + flow.data(), + output.data(), + batch_size, + channels, + cblocks, + width, + wblocks, + height, + width_height); +} +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp.h b/Codes/flownet2/src/ops/flow_warp/flow_warp.h new file mode 100644 index 0000000..2780316 --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp.h @@ -0,0 +1,28 @@ +#ifndef FLOWNET_FLOWWARP_H_ +#define FLOWNET_FLOWWARP_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +#define FW_THREADS 32 +#define FW_TILE_X FW_THREADS +#define FW_TILE_C FW_THREADS + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +void FlowWarp(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor input, + typename TTypes<float, 4>::ConstTensor flow, + typename TTypes<float, 4>::Tensor output); + +void FlowWarpGrad(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor image, + typename TTypes<float, 4>::ConstTensor flow, + typename TTypes<float, 4>::ConstTensor gradient, + typename TTypes<float, 4>::Tensor image_grad, + typename TTypes<float, 4>::Tensor flow_grad); +} // end namespace tensorflow + +#endif // FLOWNET_FLOWWARP_H_ diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc new file mode 100644 index 0000000..9f3e7ea --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cc @@ -0,0 +1,57 @@ +#define EIGEN_USE_THREADS + +#include "flow_warp.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +template<typename Device> +class FlowWarpGradKernel : public OpKernel { + public: + explicit FlowWarpGradKernel(OpKernelConstruction *ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext *ctx) override { + // Get the input image and flow and verify dimensions + const Tensor& image_t = ctx->input(0); + const Tensor& flow_t = ctx->input(1); + const Tensor& grad_t = ctx->input(2); + + OP_REQUIRES(ctx, image_t.dims() == 4, + errors::InvalidArgument("Input image must have rank 4")); + OP_REQUIRES(ctx, flow_t.dims() == 4, + errors::InvalidArgument("Input flow must have rank 4")); + OP_REQUIRES(ctx, + image_t.dim_size(0) == flow_t.dim_size(0) && image_t.dim_size( + 1) == flow_t.dim_size(1) && image_t.dim_size(2) == flow_t.dim_size(2), + errors::InvalidArgument( + "Input image and flow must have same N x H x W dimensions")); + + // Allocate the memory for the output + Tensor *image_grad_t; + Tensor *flow_grad_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, image_t.shape(), &image_grad_t)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, flow_t.shape(), &flow_grad_t)); + + auto image = image_t.tensor<float, 4>(); + auto flow = flow_t.tensor<float, 4>(); + auto gradient = grad_t.tensor<float, 4>(); + auto image_grad = image_grad_t->tensor<float, 4>(); + auto flow_grad = flow_grad_t->tensor<float, 4>(); + + FlowWarpGrad(ctx->eigen_gpu_device(), + image, + flow, + gradient, + image_grad, + flow_grad); + } +}; + +REGISTER_KERNEL_BUILDER(Name("FlowWarpGrad") + .Device(DEVICE_GPU), + FlowWarpGradKernel<GPUDevice>) +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc new file mode 100644 index 0000000..25248c8 --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_grad.cu.cc @@ -0,0 +1,126 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "flow_warp.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +__global__ void FlowWarpGradKernel( + const float *image, + float *image_grad, + const float *flow, + float *flow_grad, + const float *gradient, + int batch_size, + int channels, + int cblocks, + int width, + int wblocks, + int height, + int widthheight) { + int x = blockIdx.x * FW_TILE_X + threadIdx.x; + + if (x >= width) return; + + int y = blockIdx.y; + int n = blockIdx.z; + + const int flow_idx = ((n * height + y) * width + x) * 2; + float x2 = float(x) + flow[flow_idx]; + float y2 = float(y) + flow[flow_idx + 1]; + + if ((x2 >= 0.f) && (y2 >= 0.f) && (x2 < width) && (y2 < height)) { + int ix2_L = int(x2); + int iy2_T = int(y2); + int ix2_R = min(ix2_L + 1, width - 1); + int iy2_B = min(iy2_T + 1, height - 1); + + float alpha = x2 - ix2_L; + float beta = y2 - iy2_T; + + for (int c = 0; c < channels; c++) { + float warped_diff_value = gradient[((n * height + y) * width + x) * channels + c]; + atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_L) * channels + c], + warped_diff_value * (1 - alpha) * (1 - beta)); + atomicAdd(&image_grad[((n * height + iy2_T) * width + ix2_R) * channels + c], + warped_diff_value * alpha * (1 - beta)); + atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_L) * channels + c], + warped_diff_value * (1 - alpha) * beta); + atomicAdd(&image_grad[((n * height + iy2_B) * width + ix2_R) * channels + c], + warped_diff_value * alpha * beta); + } + + float gamma = iy2_B - y2; + float bot_diff = 0; + + for (int c = 0; c < channels; c++) { + int ch_off = (n * channels + c) * height; + float temp = 0; + temp += gamma * + (image[((n * height + iy2_T) * width + ix2_R) * channels + c] - + image[((n * height + iy2_T) * width + ix2_L) * channels + c]); + temp += (1 - gamma) * + (image[((n * height + iy2_B) * width + ix2_R) * channels + c] - + image[((n * height + iy2_B) * width + ix2_L) * channels + c]); + + bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp; + } + flow_grad[((n * height + y) * width + x) * 2] = bot_diff; + + gamma = ix2_R - x2; + bot_diff = 0; + + for (int c = 0; c < channels; c++) { + float temp = 0; + temp += gamma * + (image[((n * height + iy2_B) * width + ix2_L) * channels + c] - + image[((n * height + iy2_T) * width + ix2_L) * channels + c]); + temp += (1 - gamma) * + (image[((n * height + iy2_B) * width + ix2_R) * channels + c] - + image[((n * height + iy2_T) * width + ix2_R) * channels + c]); + + bot_diff += gradient[((n * height + y) * width + x) * channels + c] * temp; + } + flow_grad[((n * height + y) * width + x) * 2 + 1] = bot_diff; + } +} + +void FlowWarpGrad(const GPUDevice& device, + typename TTypes<float, 4>::ConstTensor image, + typename TTypes<float, 4>::ConstTensor flow, + typename TTypes<float, 4>::ConstTensor gradient, + typename TTypes<float, 4>::Tensor image_grad, + typename TTypes<float, 4>::Tensor flow_grad) { + const int batch_size = image.dimension(0); + const int height = image.dimension(1); + const int width = image.dimension(2); + const int channels = image.dimension(3); + const int width_height = width * height; + + int wblocks = ((width - 1) / FW_TILE_X + 1); + int cblocks = ((channels - 1) / FW_TILE_C + 1); + dim3 warpThreads(FW_TILE_X, 1); + dim3 warpBlocks(wblocks, height, batch_size); + + cudaMemset(image_grad.data(), 0, batch_size * height * width * channels * sizeof(float)); + cudaMemset(flow_grad.data(), 0, batch_size * height * width * 2 * sizeof(float)); + + FlowWarpGradKernel << < warpBlocks, warpThreads, 0, device.stream() >> > ( + image.data(), + image_grad.data(), + flow.data(), + flow_grad.data(), + gradient.data(), + batch_size, + channels, + cblocks, + width, + wblocks, + height, + width_height); +} +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc new file mode 100644 index 0000000..aef9c74 --- /dev/null +++ b/Codes/flownet2/src/ops/flow_warp/flow_warp_op.cc @@ -0,0 +1,23 @@ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +REGISTER_OP("FlowWarp") +.Input("image: float32") +.Input("flow: float32") +.Output("output: float32") +.SetShapeFn(::tensorflow::shape_inference::UnchangedShape); + +REGISTER_OP("FlowWarpGrad") +.Input("image: float32") +.Input("flow: float32") +.Input("gradient: float32") +.Output("image_grad: float32") +.Output("flow_grad: float32") +.SetShapeFn([](shape_inference::InferenceContext *c) { + c->set_output(0, c->input(0)); + c->set_output(1, c->input(1)); + return Status::OK(); + }); +} // namespace tensorflow diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc new file mode 100644 index 0000000..b93dfa6 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.cc @@ -0,0 +1,420 @@ +#include "augmentation_base.h" + +#include <math.h> +#include <random> + +namespace tensorflow { +/** TransMat Functions **/ +void AugmentationLayerBase::TransMat::fromCoeff(AugmentationCoeff *coeff, + int out_width, + int out_height, + int src_width, + int src_height) { + leftMultiply(1, 0, -0.5 * out_width, + 0, 1, -0.5 * out_height); + + if (coeff->angle) { + leftMultiply(cos(coeff->angle()), -sin(coeff->angle()), 0, + sin(coeff->angle()), cos(coeff->angle()), 0); + } + + if (coeff->dx || coeff->dy) { + leftMultiply(1, 0, coeff->dx() * out_width, + 0, 1, coeff->dy() * out_height); + } + + if (coeff->zoom_x || coeff->zoom_y) { + leftMultiply(1.0 / coeff->zoom_x(), 0, 0, + 0, 1.0 / coeff->zoom_y(), 0); + } + + leftMultiply(1, 0, 0.5 * src_width, + 0, 1, 0.5 * src_height); +} + +void AugmentationLayerBase::TransMat::fromTensor(const float *tensor_data) { + t0 = tensor_data[0]; + t1 = tensor_data[1]; + t2 = tensor_data[2]; + t3 = tensor_data[3]; + t4 = tensor_data[4]; + t5 = tensor_data[5]; +} + +AugmentationLayerBase::TransMat AugmentationLayerBase::TransMat::inverse() { + float a = this->t0, b = this->t1, c = this->t2; + float d = this->t3, e = this->t4, f = this->t5; + + float denom = a * e - b * d; + + TransMat result; + + result.t0 = e / denom; + result.t1 = b / -denom; + result.t2 = (c * e - b * f) / -denom; + result.t3 = d / -denom; + result.t4 = a / denom; + result.t5 = (c * d - a * f) / denom; + + return result; +} + +void AugmentationLayerBase::TransMat::leftMultiply(float u0, + float u1, + float u2, + float u3, + float u4, + float u5) { + float t0 = this->t0, t1 = this->t1, t2 = this->t2; + float t3 = this->t3, t4 = this->t4, t5 = this->t5; + + this->t0 = t0 * u0 + t3 * u1; + this->t1 = t1 * u0 + t4 * u1; + this->t2 = t2 * u0 + t5 * u1 + u2; + this->t3 = t0 * u3 + t3 * u4; + this->t4 = t1 * u3 + t4 * u4; + this->t5 = t2 * u3 + t5 * u4 + u5; +} + +void AugmentationLayerBase::TransMat::toIdentity() { + t0 = 1; t1 = 0; t2 = 0; + t3 = 0; t4 = 1; t5 = 0; +} + +/** AugmentationCoeff Functions **/ +void AugmentationCoeff::clear() { + // Spatial variables + dx.clear(); + dy.clear(); + angle.clear(); + zoom_x.clear(); + zoom_y.clear(); + + // Chromatic variables + gamma.clear(); + brightness.clear(); + contrast.clear(); + color1.clear(); + color2.clear(); + color3.clear(); +} + +void AugmentationCoeff::combine_with(const AugmentationCoeff& coeff) { + // Spatial types + if (coeff.dx) { + dx = dx() * coeff.dx(); + } + + if (coeff.dy) { + dy = dy() * coeff.dy(); + } + + if (coeff.angle) { + angle = angle() * coeff.angle(); + } + + if (coeff.zoom_x) { + zoom_x = zoom_x() * coeff.zoom_x(); + } + + if (coeff.zoom_y) { + zoom_y = zoom_y() * coeff.zoom_y(); + } + + // Chromatic types + if (coeff.gamma) { + gamma = gamma() * coeff.gamma(); + } + + if (coeff.brightness) { + brightness = brightness() * coeff.brightness(); + } + + if (coeff.contrast) { + contrast = contrast() * coeff.contrast(); + } + + if (coeff.color1) { + color1 = color1() * coeff.color1(); + } + + if (coeff.color2) { + color2 = color2() * coeff.color2(); + } + + if (coeff.color3) { + color3 = color3() * coeff.color3(); + } +} + +void AugmentationCoeff::replace_with(const AugmentationCoeff& coeff) { + // Spatial types + if (coeff.dx) { + dx = coeff.dx(); + } + + if (coeff.dy) { + dy = coeff.dy(); + } + + if (coeff.angle) { + angle = coeff.angle(); + } + + if (coeff.zoom_x) { + zoom_x = coeff.zoom_x(); + } + + if (coeff.zoom_y) { + zoom_y = coeff.zoom_y(); + } + + // Chromatic types + if (coeff.gamma) { + gamma = gamma() * coeff.gamma(); + } + + if (coeff.brightness) { + brightness = coeff.brightness(); + } + + if (coeff.contrast) { + contrast = coeff.contrast(); + } + + if (coeff.color1) { + color1 = coeff.color1(); + } + + if (coeff.color2) { + color2 = coeff.color2(); + } + + if (coeff.color3) { + color3 = coeff.color3(); + } +} + +/** AugmentationLayerBase Functions **/ +float AugmentationLayerBase::rng_generate(const AugmentationParam& param, + float discount_coeff, + const float default_value) { + std::random_device rd; // Will be used to obtain a seed for the random number + // engine + std::mt19937 gen(rd()); // Standard mersenne_twister_engine seeded with rd() + + float spread = param.spread * discount_coeff; + + if (param.rand_type == "uniform_bernoulli") { + float tmp1 = 0.0; + bool tmp2 = false; + + if (param.prob > 0.0) { + std::bernoulli_distribution bernoulli(param.prob); + tmp2 = bernoulli(gen); + } + + if (!tmp2) { + return default_value; + } + + if (param.spread > 0.0) { + std::uniform_real_distribution<> uniform(param.mean - spread, + param.mean + spread); + tmp1 = uniform(gen); + } else { + tmp1 = param.mean; + } + + if (param.should_exp) { + tmp1 = exp(tmp1); + } + + return tmp1; + } else if (param.rand_type == "gaussian_bernoulli") { + float tmp1 = 0.0; + bool tmp2 = false; + + if (param.prob > 0.0) { + std::bernoulli_distribution bernoulli(param.prob); + tmp2 = bernoulli(gen); + } + + if (!tmp2) { + return default_value; + } + + if (spread > 0.0) { + std::normal_distribution<> normal(param.mean, spread); + tmp1 = normal(gen); + } else { + tmp1 = param.mean; + } + + if (param.should_exp) { + tmp1 = exp(tmp1); + } + + return tmp1; + } else { + throw "Unknown random type: " + param.rand_type; + } +} + +void AugmentationLayerBase::generate_chromatic_coeffs(float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff) { + if (aug.gamma) { + coeff.gamma = rng_generate(aug.gamma(), discount_coeff, coeff.gamma.get_default()); + } + + if (aug.brightness) { + coeff.brightness = + rng_generate(aug.brightness(), discount_coeff, coeff.brightness.get_default()); + } + + if (aug.contrast) { + coeff.contrast = rng_generate(aug.contrast(), discount_coeff, coeff.contrast.get_default()); + } + + if (aug.color) { + coeff.color1 = rng_generate(aug.color(), discount_coeff, coeff.color1.get_default()); + coeff.color2 = rng_generate(aug.color(), discount_coeff, coeff.color2.get_default()); + coeff.color3 = rng_generate(aug.color(), discount_coeff, coeff.color3.get_default()); + } +} + +void AugmentationLayerBase::generate_spatial_coeffs(float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff) { + if (aug.translate) { + coeff.dx = rng_generate(aug.translate(), discount_coeff, coeff.dx.get_default()); + coeff.dy = rng_generate(aug.translate(), discount_coeff, coeff.dy.get_default()); + } + + if (aug.rotate) { + coeff.angle = rng_generate(aug.rotate(), discount_coeff, coeff.angle.get_default()); + } + + if (aug.zoom) { + coeff.zoom_x = rng_generate(aug.zoom(), discount_coeff, coeff.zoom_x.get_default()); + coeff.zoom_y = coeff.zoom_x(); + } + + if (aug.squeeze) { + float squeeze_coeff = rng_generate(aug.squeeze(), discount_coeff, 1.0); + coeff.zoom_x = coeff.zoom_x() * squeeze_coeff; + coeff.zoom_y = coeff.zoom_y() * squeeze_coeff; + } +} + +void AugmentationLayerBase::generate_valid_spatial_coeffs( + float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff, + int src_width, + int src_height, + int out_width, + int out_height) { + int x, y; + float x1, y1, x2, y2; + int counter = 0; + int good_params = 0; + AugmentationCoeff incoming_coeff(coeff); + + while (good_params < 4 && counter < 50) { + coeff.clear(); + AugmentationLayerBase::generate_spatial_coeffs(discount_coeff, aug, coeff); + coeff.combine_with(incoming_coeff); + + // Check if all 4 corners of the transformed image fit into the original + // image + good_params = 0; + + for (x = 0; x < out_width; x += out_width - 1) { + for (y = 0; y < out_height; y += out_height - 1) { + // move the origin + x1 = x - 0.5 * out_width; + y1 = y - 0.5 * out_height; + + // rotate + x2 = cos(coeff.angle()) * x1 - sin(coeff.angle()) * y1; + y2 = sin(coeff.angle()) * x1 + sin(coeff.angle()) * y1; + + // translate + x2 = x2 + coeff.dx() * out_width; + y2 = y2 + coeff.dy() * out_height; + + // zoom + x2 = x2 / coeff.zoom_x(); + y2 = y2 / coeff.zoom_y(); + + // move the origin back + x2 = x2 + 0.5 * src_width; + y2 = y2 + 0.5 * src_height; + + if (!((floor(x2) < 0) || (floor(x2) > src_width - 2.0) || + (floor(y2) < 0) || (floor(y2) > src_height - 2.0))) { + good_params++; + } + } + } + counter++; + } + + if (counter >= 50) { + printf("Warning: No suitable spatial transformation after %d attempts.\n", counter); + coeff.clear(); + coeff.replace_with(incoming_coeff); + } +} + +void AugmentationLayerBase::copy_chromatic_coeffs_to_tensor( + const std::vector<AugmentationCoeff>& coeff_arr, + typename TTypes<float, 2>::Tensor& out) +{ + float *out_ptr = out.data(); + int counter = 0; + + for (AugmentationCoeff coeff : coeff_arr) { + out_ptr[counter + 0] = coeff.gamma(); + out_ptr[counter + 1] = coeff.brightness(); + out_ptr[counter + 2] = coeff.contrast(); + out_ptr[counter + 3] = coeff.color1(); + out_ptr[counter + 4] = coeff.color2(); + out_ptr[counter + 5] = coeff.color3(); + counter += 6; + } +} + +void AugmentationLayerBase::copy_spatial_coeffs_to_tensor( + const std::vector<AugmentationCoeff>& coeff_arr, + const int out_width, + const int out_height, + const int src_width, + const int src_height, + typename TTypes<float, 2>::Tensor& out, + const bool invert) +{ + float *out_ptr = out.data(); + int counter = 0; + TransMat t; + + for (AugmentationCoeff coeff : coeff_arr) { + t.toIdentity(); + t.fromCoeff(&coeff, out_width, out_height, src_width, src_height); + + if (invert) { + t = t.inverse(); + } + + out_ptr[counter + 0] = t.t0; + out_ptr[counter + 1] = t.t1; + out_ptr[counter + 2] = t.t2; + out_ptr[counter + 3] = t.t3; + out_ptr[counter + 4] = t.t4; + out_ptr[counter + 5] = t.t5; + counter += 6; + } +} +} diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h new file mode 100644 index 0000000..d2aba2c --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/augmentation_base.h @@ -0,0 +1,228 @@ +#ifndef AUGMENTATION_LAYER_BASE_H_ +#define AUGMENTATION_LAYER_BASE_H_ + +#include "tensorflow/core/framework/tensor_types.h" + +#include <iostream> +#include <string> +#include <vector> + +namespace tensorflow { +template<typename T> +class OptionalType { + public: + OptionalType(const T default_value) : default_value(default_value), has_value(false) {} + + operator bool() const { + return has_value; + } + + OptionalType& operator=(T val) { + has_value = true; + value = val; + return *this; + } + + const T operator()() const { + return has_value ? value : default_value; + } + + void clear() { + has_value = false; + } + + const T get_default() { + return default_value; + } + + private: + T value; + bool has_value; + const T default_value; +}; + +class AugmentationCoeff { + public: + // Spatial Types + OptionalType<float>dx; + OptionalType<float>dy; + OptionalType<float>angle; + OptionalType<float>zoom_x; + OptionalType<float>zoom_y; + + // Chromatic Types + OptionalType<float>gamma; + OptionalType<float>brightness; + OptionalType<float>contrast; + OptionalType<float>color1; + OptionalType<float>color2; + OptionalType<float>color3; + + AugmentationCoeff() : dx(0.0), dy(0.0), angle(0.0), zoom_x(1.0), zoom_y(1.0), gamma(1.0), + brightness(0.0), contrast(1.0), color1(1.0), color2(1.0), color3(1.0) {} + + AugmentationCoeff(const AugmentationCoeff& coeff) : AugmentationCoeff() { + replace_with(coeff); + } + + void clear(); + + void combine_with(const AugmentationCoeff& coeff); + + void replace_with(const AugmentationCoeff& coeff); +}; + +typedef struct AugmentationParam { + std::string rand_type; + bool should_exp; + float mean; + float spread; + float prob; +} AugmentationParam; + +class AugmentationParams { + public: + int crop_height; + int crop_width; + + // Spatial options + OptionalType<struct AugmentationParam>translate; + OptionalType<struct AugmentationParam>rotate; + OptionalType<struct AugmentationParam>zoom; + OptionalType<struct AugmentationParam>squeeze; + + // Chromatic options + OptionalType<struct AugmentationParam>gamma; + OptionalType<struct AugmentationParam>brightness; + OptionalType<struct AugmentationParam>contrast; + OptionalType<struct AugmentationParam>color; + + inline AugmentationParams(int crop_height, + int crop_width, + std::vector<std::string>params_name, + std::vector<std::string>params_rand_type, + std::vector<bool> params_exp, + std::vector<float> params_mean, + std::vector<float> params_spread, + std::vector<float> params_prob) : + crop_height(crop_height), + crop_width(crop_width), + translate(AugmentationParam()), + rotate(AugmentationParam()), + zoom(AugmentationParam()), + squeeze(AugmentationParam()), + gamma(AugmentationParam()), + brightness(AugmentationParam()), + contrast(AugmentationParam()), + color(AugmentationParam()) { + for (int i = 0; i < params_name.size(); i++) { + const std::string name = params_name[i]; + const std::string rand_type = params_rand_type[i]; + const bool should_exp = params_exp[i]; + const float mean = params_mean[i]; + const float spread = params_spread[i]; + const float prob = params_prob[i]; + + struct AugmentationParam param = { rand_type, should_exp, mean, spread, prob }; + + if (name == "translate") { + this->translate = param; + } else if (name == "rotate") { + this->rotate = param; + } else if (name == "zoom") { + this->zoom = param; + } else if (name == "squeeze") { + this->squeeze = param; + } else if (name == "noise") { + // NoOp: We handle noise on the Python side + } else if (name == "gamma") { + this->gamma = param; + } else if (name == "brightness") { + this->brightness = param; + } else if (name == "contrast") { + this->contrast = param; + } else if (name == "color") { + this->color = param; + } else { + std::cout << "Ignoring unknown augmentation parameter: " << name << std::endl; + } + } + } + + bool should_do_spatial_transform() { + return this->translate || this->rotate || this->zoom || this->squeeze; + } + + bool should_do_chromatic_transform() { + return this->gamma || this->brightness || this->contrast || this->color; + } +}; + +class AugmentationLayerBase { + public: + class TransMat { + /** + * Translation matrix class for spatial augmentation + * | 0 1 2 | + * | 3 4 5 | + */ + + public: + float t0, t1, t2; + float t3, t4, t5; + + + void fromCoeff(AugmentationCoeff *coeff, + int out_width, + int out_height, + int src_width, + int src_height); + + void fromTensor(const float *tensor_data); + + TransMat inverse(); + + void leftMultiply(float u0, + float u1, + float u2, + float u3, + float u4, + float u5); + + void toIdentity(); + }; + + // TODO: Class ChromaticCoeffs + + static float rng_generate(const AugmentationParam& param, + float discount_coeff, + const float default_value); + + static void clear_spatial_coeffs(AugmentationCoeff& coeff); + static void generate_chromatic_coeffs(float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff); + static void generate_spatial_coeffs(float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff); + static void generate_valid_spatial_coeffs(float discount_coeff, + const AugmentationParams& aug, + AugmentationCoeff & coeff, + int src_width, + int src_height, + int out_width, + int out_height); + + static void copy_chromatic_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr, + typename TTypes<float, 2>::Tensor& out); + static void copy_spatial_coeffs_to_tensor(const std::vector<AugmentationCoeff>& coeff_arr, + const int out_width, + const int out_height, + const int src_width, + const int src_height, + typename TTypes<float, 2>::Tensor& out, + const bool invert = false); +}; +} // namespace tensorflow + +#endif // AUGMENTATION_LAYER_BASE_H_ diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc new file mode 100644 index 0000000..77b8c83 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cc @@ -0,0 +1,461 @@ +#define EIGEN_USE_THREADS + +#include <algorithm> +#include <iostream> +#include <random> +#include <vector> + +#include "augmentation_base.h" +#include "data_augmentation.h" +#include "tensorflow/core/framework/op_kernel.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/logging.h" + +#include "tensorflow/core/util/work_sharder.h" + +namespace tensorflow { +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +inline float clamp(float f, float a, float b) { + return fmaxf(a, fminf(f, b)); +} + +template<> +void Augment(OpKernelContext *context, + const CPUDevice& d, + const int batch_size, + const int channels, + const int src_width, + const int src_height, + const int src_count, + const int out_width, + const int out_height, + const float *src_data, + float *out_data, + const float *transMats, + float *chromatic_coeffs) { + const int64 channel_count = batch_size * out_height * out_width; + const int kCostPerChannel = 10; + const DeviceBase::CpuWorkerThreads& worker_threads = + *context->device()->tensorflow_cpu_worker_threads(); + + Shard(worker_threads.num_threads, + worker_threads.workers, + channel_count, + kCostPerChannel, + [batch_size, channels, src_width, + src_height, src_count, out_width, out_height, src_data, + out_data, transMats, chromatic_coeffs]( + int64 start_channel, int64 end_channel) { + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + for (int index = start_channel; index < end_channel; index++) { + int x = index % out_width; + int y = (index / out_width) % out_height; + int n = index / out_width / out_height; + + const float *transMat = transMats + n * 6; + + float gamma, brightness, contrast; + + if (chromatic_coeffs) { + gamma = chromatic_coeffs[n * 6 + 0]; + brightness = chromatic_coeffs[n * 6 + 1]; + contrast = chromatic_coeffs[n * 6 + 2]; + } + + float xpos = x * transMat[0] + y * transMat[1] + transMat[2]; + float ypos = x * transMat[3] + y * transMat[4] + transMat[5]; + + xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f); + ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f); + + float tlx = floor(xpos); + float tly = floor(ypos); + + float xdist = xpos - tlx; + float ydist = ypos - tly; + + int srcTLIdxOffset = ((n * src_height + (int)tly) * src_width + (int)tlx) * channels; + + // ((n * src_height + tly) * src_width + (tlx + 1)) * channels + int srcTRIdxOffset = srcTLIdxOffset + channels; + + // ((n * src_height + (tly + 1)) * src_width + tlx) * channels + int srcBLIdxOffset = srcTLIdxOffset + channels * src_width; + + // ((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels + int srcBRIdxOffset = srcTLIdxOffset + channels + channels * src_width; + + // Variables for chromatic transform + int data_index[3]; + float rgb[3]; + float mean_in = 0; + float mean_out = 0; + + for (int c = 0; c < channels; c++) { + // Bilinear interpolation + int srcTLIdx = srcTLIdxOffset + c; + int srcTRIdx = std::min(srcTRIdxOffset + c, src_count); + int srcBLIdx = std::min(srcBLIdxOffset + c, src_count); + int srcBRIdx = std::min(srcBRIdxOffset + c, src_count); + + float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx] + + (xdist) * (ydist) * src_data[srcBRIdx] + + (1 - xdist) * (ydist) * src_data[srcBLIdx] + + (xdist) * (1 - ydist) * src_data[srcTRIdx]; + + if (chromatic_coeffs) { + // Gather data for chromatic transform + data_index[c] = index * channels + c; + rgb[c] = dest; + mean_in += rgb[c]; + + // Note: coeff[3] == color1, coeff[4] == color2, ... + rgb[c] *= chromatic_coeffs[n * 6 + (3 + c)]; + + mean_out += rgb[c]; + } else { + out_data[index * channels + c] = dest; + } + } + + float brightness_coeff = mean_in / (mean_out + 0.01f); + + if (chromatic_coeffs) { + // Chromatic transformation + for (int c = 0; c < channels; c++) { + // compensate brightness + rgb[c] = clamp(rgb[c] * brightness_coeff, 0.0f, 1.0f); + + // gamma change + rgb[c] = pow(rgb[c], gamma); + + // brightness change + rgb[c] = rgb[c] + brightness; + + // contrast change + rgb[c] = 0.5f + (rgb[c] - 0.5f) * contrast; + + out_data[data_index[c]] = clamp(rgb[c], 0.0f, 1.0f); + } + } + } + }); +} + +template<typename Device> +class DataAugmentation : public OpKernel { + public: + explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) { + // Get the crop [height, width] tensor and verify its dimensions + OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_)); + OP_REQUIRES(ctx, crop_.size() == 2, + errors::InvalidArgument("crop must be 2 dimensions")); + + // TODO: Verify params are all the same length + + // Get the tensors for params_a and verify their dimensions + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", ¶ms_a_name_)); + OP_REQUIRES_OK(ctx, + ctx->GetAttr("params_a_rand_type", ¶ms_a_rand_type_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", ¶ms_a_exp_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", ¶ms_a_mean_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread", ¶ms_a_spread_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", ¶ms_a_prob_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_coeff_schedule", ¶ms_a_coeff_schedule_)); + + // Get the tensors for params_b and verify their dimensions + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", ¶ms_b_name_)); + OP_REQUIRES_OK(ctx, + ctx->GetAttr("params_b_rand_type", ¶ms_b_rand_type_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", ¶ms_b_exp_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", ¶ms_b_mean_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread", ¶ms_b_spread_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", ¶ms_b_prob_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_coeff_schedule", ¶ms_b_coeff_schedule_)); + } + + void Compute(OpKernelContext *ctx) override { + // Get the input images + const Tensor& input_a_t = ctx->input(0); + const Tensor& input_b_t = ctx->input(1); + + // Get the global step value + const Tensor& global_step_t = ctx->input(2); + auto global_step_eigen = global_step_t.tensor<int64, 0>(); + const int64 global_step = global_step_eigen.data()[0]; + + // Dimension constants + const int batch_size = input_a_t.dim_size(0); + const int src_height = input_a_t.dim_size(1); + const int src_width = input_a_t.dim_size(2); + const int channels = input_a_t.dim_size(3); + const int src_count = batch_size * src_height * src_width * channels; + const int out_height = crop_[0]; + const int out_width = crop_[1]; + const int out_count = batch_size * out_height * out_width * channels; + + // All tensors for this op + Tensor chromatic_coeffs_a_t; + Tensor chromatic_coeffs_b_t; + + // Allocate the memory for the output images + Tensor *output_a_t; + Tensor *output_b_t; + + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({ batch_size, crop_[0], crop_[1], + channels }), &output_a_t)); + OP_REQUIRES_OK(ctx, + ctx->allocate_output(1, TensorShape({ batch_size, crop_[0], crop_[1], + channels }), &output_b_t)); + + // Allocate the memory for the output spatial transforms + Tensor *spat_transform_a_t; + Tensor *spat_transform_b_t; + + OP_REQUIRES_OK(ctx, + ctx->allocate_output(2, TensorShape({ batch_size, 6 }), + &spat_transform_a_t)); + OP_REQUIRES_OK(ctx, + ctx->allocate_output(3, TensorShape({ batch_size, 6 }), + &spat_transform_b_t)); + + // Compute discount for coefficients if using a schedule + float discount_coeff_a = 1.0; + float discount_coeff_b = 1.0; + + if (params_a_coeff_schedule_.size() == 3) { + float half_life = params_a_coeff_schedule_[0]; + float initial_coeff = params_a_coeff_schedule_[1]; + float final_coeff = params_a_coeff_schedule_[2]; + discount_coeff_a = initial_coeff + (final_coeff - initial_coeff) * + (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0); + } + + if (params_b_coeff_schedule_.size() == 3) { + if (params_a_coeff_schedule_.size() == 3) { + discount_coeff_b = discount_coeff_a; + } else { + float half_life = params_b_coeff_schedule_[0]; + float initial_coeff = params_b_coeff_schedule_[1]; + float final_coeff = params_b_coeff_schedule_[2]; + discount_coeff_b = initial_coeff + (final_coeff - initial_coeff) * + (2.0 / (1.0 + exp(-1.0986 * global_step / half_life)) - 1.0); + } + } + + /*** BEGIN AUGMENTATION TO IMAGE A ***/ + auto input_a = input_a_t.tensor<float, 4>(); + auto output_a = output_a_t->tensor<float, 4>(); + + // Load augmentation parameters for image A + AugmentationParams aug_a = AugmentationParams(out_height, out_width, + params_a_name_, + params_a_rand_type_, + params_a_exp_, + params_a_mean_, + params_a_spread_, + params_a_prob_); + + std::vector<AugmentationCoeff> coeffs_a; + + + bool gen_spatial_transform = aug_a.should_do_spatial_transform(); + bool gen_chromatic_transform = aug_a.should_do_chromatic_transform(); + + for (int n = 0; n < batch_size; n++) { + AugmentationCoeff coeff; + + if (gen_spatial_transform) { + AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_a, aug_a, coeff, + src_width, src_height, + out_width, out_height); + } + + if (gen_chromatic_transform) { + AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_a, aug_a, coeff); + } + + coeffs_a.push_back(coeff); + } + + // Copy spatial coefficients A to the output Tensor on the CPU + // (output for FlowAugmentation) + auto spat_transform_a = spat_transform_a_t->tensor<float, 2>(); + AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a, + out_width, out_height, + src_width, src_height, + spat_transform_a); + + float *chromatic_coeffs_a_data = NULL; + + if (gen_chromatic_transform) { + // Allocate a temporary tensor to hold the chromatic coefficients + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, 6 }), + &chromatic_coeffs_a_t)); + + // Copy the chromatic coefficients A to a temporary Tensor on the CPU + auto chromatic_coeffs_a = chromatic_coeffs_a_t.tensor<float, 2>(); + AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_a, chromatic_coeffs_a); + chromatic_coeffs_a_data = chromatic_coeffs_a.data(); + } + + // Perform augmentation either on CPU or GPU + Augment<Device>( + ctx, + ctx->eigen_device<Device>(), + batch_size, + channels, + src_width, + src_height, + src_count, + out_width, + out_height, + input_a.data(), + output_a.data(), + spat_transform_a.data(), + chromatic_coeffs_a_data); + + /*** END AUGMENTATION TO IMAGE A ***/ + + /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/ + AugmentationParams aug_b = AugmentationParams(out_height, out_width, + params_b_name_, + params_b_rand_type_, + params_b_exp_, + params_b_mean_, + params_b_spread_, + params_b_prob_); + + std::vector<AugmentationCoeff> coeffs_b; + + bool gen_spatial_transform_b = aug_b.should_do_spatial_transform(); + bool gen_chromatic_transform_b = aug_b.should_do_chromatic_transform(); + + for (int n = 0; n < batch_size; n++) { + AugmentationCoeff coeff(coeffs_a[n]); + + // If we did a spatial transform on image A, we need to do the same one + // (+ possibly more) on image B + if (gen_spatial_transform_b) { + AugmentationLayerBase::generate_valid_spatial_coeffs(discount_coeff_b, aug_b, coeff, + src_width, src_height, + out_width, out_height); + } + + if (gen_chromatic_transform_b) { + AugmentationLayerBase::generate_chromatic_coeffs(discount_coeff_b, aug_b, coeff); + } + + coeffs_b.push_back(coeff); + } + + /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/ + + /*** BEGIN AUGMENTATION TO IMAGE B ***/ + auto input_b = input_b_t.tensor<float, 4>(); + auto output_b = output_b_t->tensor<float, 4>(); + + // Copy spatial coefficients B to the output Tensor on the CPU + auto spat_transform_b = spat_transform_b_t->tensor<float, 2>(); + AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b, + out_width, out_height, + src_width, src_height, + spat_transform_b); + + float *chromatic_coeffs_b_data = NULL; + + if (gen_chromatic_transform || gen_chromatic_transform_b) { + // Allocate a temporary tensor to hold the chromatic coefficients + tensorflow::AllocatorAttributes pinned_allocator; + pinned_allocator.set_on_host(true); + pinned_allocator.set_gpu_compatible(true); + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum<float>::value, + TensorShape({ batch_size, 6 }), + &chromatic_coeffs_b_t, pinned_allocator)); + + // Copy the chromatic coefficients A to a temporary Tensor on the CPU + auto chromatic_coeffs_b = chromatic_coeffs_b_t.tensor<float, 2>(); + AugmentationLayerBase::copy_chromatic_coeffs_to_tensor(coeffs_b, chromatic_coeffs_b); + chromatic_coeffs_b_data = chromatic_coeffs_b.data(); + } + + // Perform augmentation either on CPU or GPU + Augment<Device>( + ctx, + ctx->eigen_device<Device>(), + batch_size, + channels, + src_width, + src_height, + src_count, + out_width, + out_height, + input_b.data(), + output_b.data(), + spat_transform_b.data(), + chromatic_coeffs_b_data); + + // FlowAugmentation needs the inverse + // TODO: To avoid rewriting, can we invert when we read on the + // FlowAugmentation side? + AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b, + out_width, out_height, + src_width, src_height, + spat_transform_b, + true); + + /*** END AUGMENTATION TO IMAGE B ***/ + } + + private: + std::vector<int32>crop_; + + // Params A + std::vector<string>params_a_name_; + std::vector<string>params_a_rand_type_; + std::vector<bool>params_a_exp_; + std::vector<float>params_a_mean_; + std::vector<float>params_a_spread_; + std::vector<float>params_a_prob_; + std::vector<float>params_a_coeff_schedule_; + + // Params B + std::vector<string>params_b_name_; + std::vector<string>params_b_rand_type_; + std::vector<bool>params_b_exp_; + std::vector<float>params_b_mean_; + std::vector<float>params_b_spread_; + std::vector<float>params_b_prob_; + std::vector<float>params_b_coeff_schedule_; +}; + + +REGISTER_KERNEL_BUILDER(Name("DataAugmentation") + .Device(DEVICE_CPU) + .HostMemory("global_step") + .HostMemory("transforms_from_a") + .HostMemory("transforms_from_b"), + DataAugmentation<CPUDevice>) + +#if GOOGLE_CUDA + +REGISTER_KERNEL_BUILDER(Name("DataAugmentation") + .Device(DEVICE_GPU) + .HostMemory("global_step") + .HostMemory("transforms_from_a") + .HostMemory("transforms_from_b"), + DataAugmentation<GPUDevice>) +#endif // GOOGLE_CUDA +} // namespace tensorflow diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc new file mode 100644 index 0000000..7a2101d --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.cu.cc @@ -0,0 +1,348 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "augmentation_base.h" +#include "data_augmentation.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +inline __device__ __host__ float clamp(float f, float a, float b) { + return fmaxf(a, fminf(f, b)); +} + +__global__ void SpatialAugmentation( + const int32 nthreads, + const int src_width, + const int src_height, + const int channels, + const int src_count, + const int out_width, + const int out_height, + const float *src_data, + float *out_data, + const float *transMats) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w) + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + int c = index % channels; + int x = (index / channels) % out_width; + int y = (index / channels / out_width) % out_height; + int n = index / channels / out_width / out_height; + + const float *transMat = transMats + n * 6; + float xpos = x * transMat[0] + y * transMat[1] + transMat[2]; + float ypos = x * transMat[3] + y * transMat[4] + transMat[5]; + + xpos = clamp(xpos, 0.0f, (float)(src_width) - 1.05f); + ypos = clamp(ypos, 0.0f, (float)(src_height) - 1.05f); + + float tlx = floor(xpos); + float tly = floor(ypos); + + // Bilinear interpolation + int srcTLIdx = ((n * src_height + tly) * src_width + tlx) * channels + c; + int srcTRIdx = min((int)(((n * src_height + tly) * src_width + (tlx + 1)) * channels + c), + src_count); + int srcBLIdx = min((int)(((n * src_height + (tly + 1)) * src_width + tlx) * channels + c), + src_count); + int srcBRIdx = min((int)(((n * src_height + (tly + 1)) * src_width + (tlx + 1)) * channels + c), + src_count); + + float xdist = xpos - tlx; + float ydist = ypos - tly; + + float dest = (1 - xdist) * (1 - ydist) * src_data[srcTLIdx] + + (xdist) * (ydist) * src_data[srcBRIdx] + + (1 - xdist) * (ydist) * src_data[srcBLIdx] + + (xdist) * (1 - ydist) * src_data[srcTRIdx]; + + out_data[index] = dest; + } +} + +typedef Eigen::GpuDevice GPUDevice; + +template<> +void Augment(OpKernelContext *context, + const GPUDevice& d, + const int batch_size, + const int channels, + const int src_width, + const int src_height, + const int src_count, + const int out_width, + const int out_height, + const float *src_data, + float *out_data, + const float *transMats, + float *chromatic_coeffs) { + const int out_count = batch_size * out_height * out_width * channels; + CudaLaunchConfig config = GetCudaLaunchConfig(out_count, d); + + printf("Chromatic transform not yet implemented on GPU, ignoring."); + + SpatialAugmentation << < config.block_count, config.thread_per_block, 0, d.stream() >> > ( + config.virtual_thread_count, src_width, src_height, channels, src_count, + out_width, out_height, + src_data, out_data, transMats); +} + +// +// template<typename Device> +// class DataAugmentation : public OpKernel { +// public: +// explicit DataAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) { +// // Get the crop [height, width] tensor and verify its dimensions +// OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_)); +// OP_REQUIRES(ctx, crop_.size() == 2, +// errors::InvalidArgument("crop must be 2 dimensions")); +// +// // TODO: Verify params are all the same length +// +// // Get the tensors for params_a and verify their dimensions +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_name", ¶ms_a_name_)); +// OP_REQUIRES_OK(ctx, +// ctx->GetAttr("params_a_rand_type", +// ¶ms_a_rand_type_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_exp", ¶ms_a_exp_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_mean", ¶ms_a_mean_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_spread", +// ¶ms_a_spread_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_a_prob", ¶ms_a_prob_)); +// +// // Get the tensors for params_b and verify their dimensions +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_name", ¶ms_b_name_)); +// OP_REQUIRES_OK(ctx, +// ctx->GetAttr("params_b_rand_type", +// ¶ms_b_rand_type_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_exp", ¶ms_b_exp_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_mean", ¶ms_b_mean_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_spread", +// ¶ms_b_spread_)); +// OP_REQUIRES_OK(ctx, ctx->GetAttr("params_b_prob", ¶ms_b_prob_)); +// } +// +// void Compute(OpKernelContext *ctx) override { +// const GPUDevice& device = ctx->eigen_gpu_device(); +// +// // Get the input images +// const Tensor& input_a_t = ctx->input(0); +// const Tensor& input_b_t = ctx->input(1); +// +// // Dimension constants +// const int batch_size = input_a_t.dim_size(0); +// const int src_height = input_a_t.dim_size(1); +// const int src_width = input_a_t.dim_size(2); +// const int channels = input_a_t.dim_size(3); +// const int src_count = batch_size * src_height * src_width * channels; +// const int out_height = crop_[0]; +// const int out_width = crop_[1]; +// const int out_count = batch_size * out_height * out_width * channels; +// +// // Allocate the memory for the output images +// Tensor *output_a_t; +// Tensor *output_b_t; +// +// OP_REQUIRES_OK(ctx, +// ctx->allocate_output(0, TensorShape({ batch_size, +// crop_[0], crop_[1], +// channels }), +// &output_a_t)); +// OP_REQUIRES_OK(ctx, +// ctx->allocate_output(1, TensorShape({ batch_size, +// crop_[0], crop_[1], +// channels }), +// &output_b_t)); +// +// // Allocate the memory for the output spatial transforms +// Tensor *spat_transform_a_t; +// Tensor *spat_transform_b_t; +// +// OP_REQUIRES_OK(ctx, +// ctx->allocate_output(2, TensorShape({ batch_size, 6 }), +// &spat_transform_a_t)); +// OP_REQUIRES_OK(ctx, +// ctx->allocate_output(3, TensorShape({ batch_size, 6 }), +// &spat_transform_b_t)); +// +// // Allocate temporary pinned memory for the spatial transforms to be +// used +// // on the GPU +// tensorflow::AllocatorAttributes pinned_allocator; +// pinned_allocator.set_on_host(true); +// pinned_allocator.set_gpu_compatible(true); +// +// Tensor spat_transform_a_pinned_t; +// Tensor spat_transform_b_pinned_t; +// OP_REQUIRES_OK(ctx, +// ctx->allocate_temp(DataTypeToEnum<float>::value, +// TensorShape({ batch_size, 6 }), +// &spat_transform_a_pinned_t, +// pinned_allocator)); +// OP_REQUIRES_OK(ctx, +// ctx->allocate_temp(DataTypeToEnum<float>::value, +// TensorShape({ batch_size, 6 }), +// &spat_transform_b_pinned_t, +// pinned_allocator)); +// auto spat_transform_a_pinned = spat_transform_a_pinned_t.tensor<float, +// 2>(); +// auto spat_transform_b_pinned = spat_transform_b_pinned_t.tensor<float, +// 2>(); +// +// /*** BEGIN AUGMENTATION TO IMAGE A ***/ +// auto input_a = input_a_t.tensor<float, 4>(); +// auto output_a = output_a_t->tensor<float, 4>(); +// +// // Load augmentation parameters for image A +// AugmentationParams aug_a = AugmentationParams(out_height, out_width, +// params_a_name_, +// params_a_rand_type_, +// params_a_exp_, +// params_a_mean_, +// params_a_spread_, +// params_a_prob_); +// +// std::vector<AugmentationCoeff> coeffs_a; +// +// bool gen_spatial_transform = aug_a.should_do_spatial_transform(); +// +// for (int n = 0; n < batch_size; n++) { +// AugmentationCoeff coeff; +// +// if (gen_spatial_transform) { +// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_a, coeff, +// src_width, +// src_height, +// out_width, +// out_height); +// } +// +// coeffs_a.push_back(coeff); +// } +// +// // Copy spatial coefficients A to the output Tensor on the CPU (output +// for +// // FlowAugmentation) +// auto spat_transform_a = spat_transform_a_t->tensor<float, 2>(); +// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a, +// out_width, +// out_height, +// src_width, +// src_height, +// spat_transform_a); +// +// // ...as well as a Tensor going to the GPU +// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_a, +// out_width, +// out_height, +// src_width, +// src_height, +// +// +// +// spat_transform_a_pinned); +// +// CudaLaunchConfig config = GetCudaLaunchConfig(out_count, device); +// SpatialAugmentation << < config.block_count, config.thread_per_block, +// 0, +// device.stream() >> > ( +// config.virtual_thread_count, src_width, src_height, channels, +// src_count, +// out_width, out_height, +// input_a.data(), output_a.data(), spat_transform_a_pinned.data()); +// +// /*** END AUGMENTATION TO IMAGE A ***/ +// +// /*** BEGIN GENERATE NEW COEFFICIENTS FOR IMAGE B ***/ +// AugmentationParams aug_b = AugmentationParams(out_height, out_width, +// params_b_name_, +// params_b_rand_type_, +// params_b_exp_, +// params_b_mean_, +// params_b_spread_, +// params_b_prob_); +// +// std::vector<AugmentationCoeff> coeffs_b; +// +// gen_spatial_transform = aug_b.should_do_spatial_transform(); +// +// for (int n = 0; n < batch_size; n++) { +// AugmentationCoeff coeff; +// +// if (gen_spatial_transform) { +// AugmentationLayerBase::generate_valid_spatial_coeffs(aug_b, coeff, +// src_width, +// src_height, +// out_width, +// out_height); +// } +// +// coeffs_b.push_back(coeff); +// } +// +// /*** END GENERATE NEW COEFFICIENTS FOR IMAGE B ***/ +// +// /*** BEGIN AUGMENTATION TO IMAGE B ***/ +// auto input_b = input_b_t.tensor<float, 4>(); +// auto output_b = output_b_t->tensor<float, 4>(); +// +// // Copy spatial coefficients B to the output Tensor on the CPU +// auto spat_transform_b = spat_transform_b_t->tensor<float, 2>(); +// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b, +// out_width, +// out_height, +// src_width, +// src_height, +// spat_transform_b, +// true); +// AugmentationLayerBase::copy_spatial_coeffs_to_tensor(coeffs_b, +// out_width, +// out_height, +// src_width, +// src_height, +// +// +// +// spat_transform_b_pinned); +// +// SpatialAugmentation << < config.block_count, config.thread_per_block, +// 0, +// device.stream() >> > ( +// config.virtual_thread_count, src_width, src_height, channels, +// src_count, +// out_width, out_height, +// input_b.data(), output_b.data(), spat_transform_b_pinned.data()); +// +// /*** END AUGMENTATION TO IMAGE B ***/ +// } +// +// private: +// std::vector<int32>crop_; +// +// // Params A +// std::vector<string>params_a_name_; +// std::vector<string>params_a_rand_type_; +// std::vector<bool>params_a_exp_; +// std::vector<float>params_a_mean_; +// std::vector<float>params_a_spread_; +// std::vector<float>params_a_prob_; +// +// // Params B +// std::vector<string>params_b_name_; +// std::vector<string>params_b_rand_type_; +// std::vector<bool>params_b_exp_; +// std::vector<float>params_b_mean_; +// std::vector<float>params_b_spread_; +// std::vector<float>params_b_prob_; +// }; +} // namespace tensorflow +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h new file mode 100644 index 0000000..545b8a0 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/data_augmentation.h @@ -0,0 +1,22 @@ +#ifndef FLOWNET_DATA_AUGMENTATION_H_ +#define FLOWNET_DATA_AUGMENTATION_H_ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +template<class Device> +void Augment(OpKernelContext *context, + const Device & d, + const int batch_size, + const int channels, + const int src_width, + const int src_height, + const int src_count, + const int out_width, + const int out_height, + const float *src_data, + float *out_data, + const float *transMats, + float *chromatic_coeffs); +} // namespace tensorflow +#endif // FLOWNET_DATA_AUGMENTATION_H_ diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc new file mode 100644 index 0000000..b5cc11f --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.cc @@ -0,0 +1,129 @@ +#define EIGEN_USE_THREADS + +#include "flow_augmentation.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +inline int clamp(int f, int a, int b) { + return std::max(a, std::min(f, b)); +} + +template<> +void FillFlowAugmentation(const CPUDevice& device, + typename TTypes<float, 4>::Tensor output, + typename TTypes<float, 4>::ConstTensor flows, + typename TTypes<float, 2>::ConstTensor transforms_from_a, + typename TTypes<float, 2>::ConstTensor transforms_from_b) { + const int batch_size = output.dimension(0); + const int out_height = output.dimension(1); + const int out_width = output.dimension(2); + const int src_height = flows.dimension(1); + const int src_width = flows.dimension(2); + const int src_total_count = flows.dimension(0) * flows.dimension(1) * + flows.dimension(2) * flows.dimension(3); + float *output_ptr = output.data(); + const float *flow_ptr = flows.data(); + + for (int n = 0; n < batch_size; n++) { + const float *transMatA = transforms_from_a.data() + n * 6; + const float *transMatB = transforms_from_b.data() + n * 6; + + for (int y = 0; y < out_height; y++) { + int outputIdxOffset = (n * out_height + y) * out_width; + + for (int x = 0; x < out_width; x++) { + // Apply transformation matrix applied to first image + const float xpos1 = x * transMatA[0] + y * transMatA[1] + transMatA[2]; + const float ypos1 = x * transMatA[3] + y * transMatA[4] + transMatA[5]; + + const int srcXIdx = + ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) * 2 + 0; + const int srcYIdx = srcXIdx + 1; + + const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)]; + const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)]; + + // Apply inverse of the transformation matrix applied to second image + const float xpos3 = xpos2 * transMatB[0] + ypos2 * transMatB[1] + transMatB[2]; + const float ypos3 = xpos2 * transMatB[3] + ypos2 * transMatB[4] + transMatB[5]; + + output_ptr[(outputIdxOffset + x) * 2 + 0] = xpos3 - (float)x; + output_ptr[(outputIdxOffset + x) * 2 + 1] = ypos3 - (float)y; + } + } + } +} + +template<typename Device> +class FlowAugmentation : public OpKernel { + public: + explicit FlowAugmentation(OpKernelConstruction *ctx) : OpKernel(ctx) { + // Get the crop [height, width] tensor and verify its dimensions + OP_REQUIRES_OK(ctx, ctx->GetAttr("crop", &crop_)); + OP_REQUIRES(ctx, crop_.size() == 2, + errors::InvalidArgument("crop must be 2 dimensions")); + } + + void Compute(OpKernelContext *ctx) override { + // Get the input images and transforms and verify their dimensions + const Tensor& flows_t = ctx->input(0); + const Tensor& transforms_from_a_t = ctx->input(1); + const Tensor& transforms_from_b_t = ctx->input(2); + + OP_REQUIRES(ctx, flows_t.dims() == 4, + errors::InvalidArgument("Input images must have rank 4")); + OP_REQUIRES(ctx, + (TensorShapeUtils::IsMatrix(transforms_from_a_t.shape()) && + transforms_from_a_t.dim_size(0) == + flows_t.dim_size(0) && + transforms_from_a_t.dim_size(1) == 6), + errors::InvalidArgument( + "Input transforms_from_a should be num_images x 6")); + OP_REQUIRES(ctx, + (TensorShapeUtils::IsMatrix(transforms_from_b_t.shape()) && + transforms_from_b_t.dim_size(0) == + flows_t.dim_size(0) && + transforms_from_b_t.dim_size(1) == 6), + errors::InvalidArgument( + "Input transforms_from_b should be num_images x 6")); + + // Allocate the memory for the output + Tensor *output_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output( + 0, + TensorShape({ flows_t.dim_size(0), crop_[0], crop_[1], + flows_t.dim_size(3) }), &output_t)); + + // Perform flow augmentation + auto flows = flows_t.tensor<float, 4>(); + auto transforms_from_a = transforms_from_a_t.tensor<float, 2>(); + auto transforms_from_b = transforms_from_b_t.tensor<float, 2>(); + auto output = output_t->tensor<float, 4>(); + + FillFlowAugmentation(ctx->eigen_device<Device>(), + output, + flows, + transforms_from_a, + transforms_from_b); + } + + private: + std::vector<int32>crop_; +}; + +REGISTER_KERNEL_BUILDER(Name("FlowAugmentation") + .Device(DEVICE_CPU), + FlowAugmentation<CPUDevice>) + +#if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER(Name("FlowAugmentation") + .Device(DEVICE_GPU), + FlowAugmentation<GPUDevice>) +#endif // GOOGLE_CUDA +} // end namespace tensorflow diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h new file mode 100644 index 0000000..7795991 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation.h @@ -0,0 +1,19 @@ +#ifndef FLOWNET_FLOW_AUG_H_ +#define FLOWNET_FLOW_AUG_H_ + +// See docs in ../ops/image_ops.cc. + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +template<class Device> +void FillFlowAugmentation(const Device& device, + typename TTypes<float, 4>::Tensor output, + typename TTypes<float, 4>::ConstTensor flows, + typename TTypes<float, 2>::ConstTensor transforms_from_a, + typename TTypes<float, 2>::ConstTensor transforms_from_b); +} // end namespace tensorflow + +#endif // FLOWNET_FLOW_AUG_H_ diff --git a/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc new file mode 100644 index 0000000..7e10864 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/kernels/flow_augmentation_gpu.cu.cc @@ -0,0 +1,95 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include <stdio.h> +#include <iostream> + +#include "flow_augmentation.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { +typedef Eigen::GpuDevice GPUDevice; + +inline __device__ __host__ int clamp(int f, int a, int b) { + return max(a, min(f, b)); +} + +__global__ void FillFlowAugmentationKernel( + const int32 nthreads, + const float *flow_ptr, + const float *transforms_from_a, + const float *inv_transforms_from_b, + const int src_total_count, const int src_height, const int src_width, + const int batch_size, const int out_height, + const int out_width, float *output_ptr) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + const float x = (float)(index % out_width); + const float y = (float)((index / out_width) % out_height); + const int n = (index / out_width / out_height); + + const int transformIdx = n * 6; + + // Apply transformation matrix applied to second image + const float xpos1 = x * transforms_from_a[transformIdx + 0] + + y * transforms_from_a[transformIdx + 1] + + transforms_from_a[transformIdx + 2]; + const float ypos1 = x * transforms_from_a[transformIdx + 3] + + y * transforms_from_a[transformIdx + 4] + + transforms_from_a[transformIdx + 5]; + + // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w) + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + const int srcXIdx = + ((n * src_height + (int)(ypos1 + 0.5)) * src_width + (int)(xpos1 + 0.5)) * + 2 + 0; + const int srcYIdx = srcXIdx + 1; + + const float xpos2 = xpos1 + flow_ptr[clamp(srcXIdx, 0, src_total_count - 1)]; + const float ypos2 = ypos1 + flow_ptr[clamp(srcYIdx, 0, src_total_count - 1)]; + + // Apply inverse of the transformation matrix applied to first image + const float xpos3 = xpos2 * inv_transforms_from_b[transformIdx + 0] + + ypos2 * inv_transforms_from_b[transformIdx + 1] + + inv_transforms_from_b[transformIdx + 2]; + const float ypos3 = xpos2 * inv_transforms_from_b[transformIdx + 3] + + ypos2 * inv_transforms_from_b[transformIdx + 4] + + inv_transforms_from_b[transformIdx + 5]; + + output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 0] = xpos3 - + x; + output_ptr[((n * out_height + (int)y) * out_width + (int)x) * 2 + 1] = ypos3 - + y; + } +} + +template<> +void FillFlowAugmentation(const GPUDevice& device, + typename TTypes<float, 4>::Tensor output, + typename TTypes<float, 4>::ConstTensor flows, + typename TTypes<const float, 2>::ConstTensor transforms_from_a, + typename TTypes<const float, 2>::ConstTensor transforms_from_b) { + const int batch_size = output.dimension(0); + const int out_height = output.dimension(1); + const int out_width = output.dimension(2); + const int depth = 2; + const int total_count = batch_size * out_height * out_width * depth; + const int src_total_count = flows.dimension(0) * flows.dimension(1) * + flows.dimension(2) * flows.dimension(3); + + CudaLaunchConfig config = GetCudaLaunchConfig(total_count / 2, device); + + FillFlowAugmentationKernel << < config.block_count, config.thread_per_block, 0, + device.stream() >> > ( + total_count / 2, flows.data(), transforms_from_a.data(), + transforms_from_b.data(), + src_total_count, flows.dimension(1), flows.dimension(2), batch_size, + out_height, out_width, output.data()); +} +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/Codes/flownet2/src/ops/preprocessing/preprocessing.cc b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc new file mode 100644 index 0000000..086a0d0 --- /dev/null +++ b/Codes/flownet2/src/ops/preprocessing/preprocessing.cc @@ -0,0 +1,96 @@ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; +using shape_inference::DimensionHandle; + +Status SetOutputToSizedImage(InferenceContext *c) { + ShapeHandle input; + + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); + DimensionHandle batch = c->Dim(input, 0); + DimensionHandle depth = c->Dim(input, 3); + std::vector<int32> crop_; + c->GetAttr("crop", &crop_); + DimensionHandle height = c->MakeDim(crop_[0]); + DimensionHandle width = c->MakeDim(crop_[1]); + c->set_output(0, c->MakeShape({ batch, height, width, depth })); + return Status::OK(); +} + +REGISTER_OP("DataAugmentation") +.Input("image_a: float32") +.Input("image_b: float32") +.Input("global_step: int64") +.Attr("crop: list(int) >= 2") +.Attr("params_a_name: list(string)") +.Attr("params_a_rand_type: list(string)") +.Attr("params_a_exp: list(bool)") +.Attr("params_a_mean: list(float)") +.Attr("params_a_spread: list(float)") +.Attr("params_a_prob: list(float)") +.Attr("params_a_coeff_schedule: list(float)") +.Attr("params_b_name: list(string)") +.Attr("params_b_rand_type: list(string)") +.Attr("params_b_exp: list(bool)") +.Attr("params_b_mean: list(float)") +.Attr("params_b_spread: list(float)") +.Attr("params_b_prob: list(float)") +.Attr("params_b_coeff_schedule: list(float)") +.Output("aug_image_a: float32") +.Output("aug_image_b: float32") +.Output("transforms_from_a: float32") +.Output("transforms_from_b: float32") +.SetShapeFn([](InferenceContext *c) { + // Verify input A and input B both have 4 dimensions + ShapeHandle input_shape_a, input_shape_b; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape_a)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &input_shape_b)); + + // TODO: Verify params vectors all have the same length + + // TODO: Move this out of here and into Compute + // Verify input A and input B are the same shape + DimensionHandle batch_size, unused; + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 0), + c->Value(c->Dim(input_shape_b, 0)), + &batch_size)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 1), + c->Value(c->Dim(input_shape_b, 1)), &unused)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 2), + c->Value(c->Dim(input_shape_b, 2)), &unused)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape_a, 3), + c->Value(c->Dim(input_shape_b, 3)), &unused)); + + // Get cropping dimensions + std::vector<int32>crop_; + TF_RETURN_IF_ERROR(c->GetAttr("crop", &crop_)); + + // Reshape input shape to cropped shape + TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 1, c->MakeDim(crop_[0]), + &input_shape_a)); + TF_RETURN_IF_ERROR(c->ReplaceDim(input_shape_a, 2, c->MakeDim(crop_[1]), + &input_shape_a)); + + // Set output images shapes + c->set_output(0, input_shape_a); + c->set_output(1, input_shape_a); + + // Set output spatial transforms shapes + c->set_output(2, c->MakeShape({ batch_size, 6 })); + c->set_output(3, c->MakeShape({ batch_size, 6 })); + + return Status::OK(); + }); + +REGISTER_OP("FlowAugmentation") +.Input("flows: float32") +.Input("transforms_from_a: float32") +.Input("transforms_from_b: float32") +.Attr("crop: list(int) >= 2") +.Output("transformed_flows: float32") +.SetShapeFn(SetOutputToSizedImage); +} // namespace tensorflow diff --git a/Codes/flownet2/src/training_schedules.py b/Codes/flownet2/src/training_schedules.py new file mode 100644 index 0000000..4db5aab --- /dev/null +++ b/Codes/flownet2/src/training_schedules.py @@ -0,0 +1,12 @@ +LONG_SCHEDULE = { + 'step_values': [400000, 600000, 800000, 1000000], + 'learning_rates': [0.0001, 0.00005, 0.000025, 0.0000125, 0.00000625], + 'momentum': 0.9, + 'momentum2': 0.999, + 'weight_decay': 0.0004, + 'max_iter': 1200000, +} + +FINETUNE_SCHEDULE = { + # TODO: Finetune schedule +} diff --git a/Codes/flownet2/src/utils.py b/Codes/flownet2/src/utils.py new file mode 100644 index 0000000..f6abe18 --- /dev/null +++ b/Codes/flownet2/src/utils.py @@ -0,0 +1,46 @@ +import tensorflow as tf + + +# Thanks, https://github.com/tensorflow/tensorflow/issues/4079 +def LeakyReLU(x, leak=0.1, name="lrelu"): + with tf.variable_scope(name): + f1 = 0.5 * (1.0 + leak) + f2 = 0.5 * (1.0 - leak) + return f1 * x + f2 * abs(x) + + +def average_endpoint_error(labels, predictions): + """ + Given labels and predictions of size (N, H, W, 2), calculates average endpoint error: + sqrt[sum_across_channels{(X - Y)^2}] + """ + num_samples = predictions.shape.as_list()[0] + with tf.name_scope(None, "average_endpoint_error", (predictions, labels)) as scope: + predictions = tf.to_float(predictions) + labels = tf.to_float(labels) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + squared_difference = tf.square(tf.subtract(predictions, labels)) + # sum across channels: sum[(X - Y)^2] -> N, H, W, 1 + loss = tf.reduce_sum(squared_difference, 3, keep_dims=True) + loss = tf.sqrt(loss) + return tf.reduce_sum(loss) / num_samples + + +def pad(tensor, num=1): + """ + Pads the given tensor along the height and width dimensions with `num` 0s on each side + """ + return tf.pad(tensor, [[0, 0], [num, num], [num, num], [0, 0]], "CONSTANT") + + +def antipad(tensor, num=1): + """ + Performs a crop. "padding" for a deconvolutional layer (conv2d tranpose) removes + padding from the output rather than adding it to the input. + """ + batch, h, w, c = tensor.get_shape().as_list() + # print(batch, h, w, c) + # print(type(batch), type(h), type(w), type(c)) + # return tf.slice(tensor, begin=[0, num, num, 0], size=[batch, h - 2 * num, w - 2 * num, c]) + return tensor[:, num: num + h - 2 * num, num: num + w - 2 * num, :] diff --git a/Codes/flownet2/test.py b/Codes/flownet2/test.py new file mode 100644 index 0000000..2fcb380 --- /dev/null +++ b/Codes/flownet2/test.py @@ -0,0 +1,163 @@ +import os +import tensorflow as tf +import numpy as np +from scipy.misc import imread +import matplotlib +from src.flowlib import read_flow, flow_to_image +matplotlib.use('TKAgg') +import matplotlib.pyplot as plt + +_preprocessing_ops = tf.load_op_library( + tf.resource_loader.get_path_to_datafile("./src/ops/build/preprocessing.so")) + + +def display(img, c): + plt.subplot(int('22' + str(c + 1))) + plt.imshow(img[0, :, :, :]) + + +def main(): + """ +.Input("image_a: float32") +.Input("image_b: float32") +.Attr("crop: list(int) >= 2") +.Attr("params_a_name: list(string)") +.Attr("params_a_rand_type: list(string)") +.Attr("params_a_exp: list(bool)") +.Attr("params_a_mean: list(float32)") +.Attr("params_a_spread: list(float32)") +.Attr("params_a_prob: list(float32)") +.Attr("params_b_name: list(string)") +.Attr("params_b_rand_type: list(string)") +.Attr("params_b_exp: list(bool)") +.Attr("params_b_mean: list(float32)") +.Attr("params_b_spread: list(float32)") +.Attr("params_b_prob: list(float32)") +.Output("aug_image_a: float32") +.Output("aug_image_b: float32") +.Output("spatial_transform_a: float32") +.Output("inv_spatial_transform_b: float32") + """ + + crop = [364, 492] + params_a_name = ['translate_x', 'translate_y'] + params_a_rand_type = ['uniform_bernoulli', 'uniform_bernoulli'] + params_a_exp = [False, False] + params_a_mean = [0.0, 0.0] + params_a_spread = [0.4, 0.4] + params_a_prob = [1.0, 1.0] + params_b_name = [] + params_b_rand_type = [] + params_b_exp = [] + params_b_mean = [] + params_b_spread = [] + params_b_prob = [] + + with tf.Session() as sess: + with tf.device('/gpu:0'): + image_a = imread('./img0.ppm') / 255.0 + image_b = imread('./img1.ppm') / 255.0 + flow = read_flow('./flow.flo') + + image_a_tf = tf.expand_dims(tf.to_float(tf.constant(image_a, dtype=tf.float64)), 0) + image_b_tf = tf.expand_dims(tf.to_float(tf.constant(image_b, dtype=tf.float64)), 0) + + preprocess = _preprocessing_ops.data_augmentation(image_a_tf, + image_b_tf, + crop, + params_a_name, + params_a_rand_type, + params_a_exp, + params_a_mean, + params_a_spread, + params_a_prob, + params_b_name, + params_b_rand_type, + params_b_exp, + params_b_mean, + params_b_spread, + params_b_prob) + + out = sess.run(preprocess) + trans = out.spatial_transform_a + inv_trans = out.inv_spatial_transform_b + + print(trans.shape) + print(inv_trans.shape) + + flow_tf = tf.expand_dims(tf.to_float(tf.constant(flow)), 0) + aug_flow_tf = _preprocessing_ops.flow_augmentation(flow_tf, trans, inv_trans, crop) + + aug_flow = sess.run(aug_flow_tf)[0, :, :, :] + + # Plot img0, img0aug + plt.subplot(321) + plt.imshow(image_a) + plt.subplot(322) + plt.imshow(out.aug_image_a[0, :, :, :]) + + # Plot img1, img1aug + plt.subplot(323) + plt.imshow(image_b) + plt.subplot(324) + plt.imshow(out.aug_image_b[0, :, :, :]) + + # Plot flow, flowaug + plt.subplot(325) + plt.imshow(flow_to_image(flow)) + plt.subplot(326) + plt.imshow(flow_to_image(aug_flow)) + + plt.show() + + # image_b_aug = sess.run(image_b_tf) + # + # display(np.expand_dims(image_a, 0), 0) + # display(np.expand_dims(image_b, 0), 1) + # display(image_a_aug, 2) + # display(image_b_aug, 3) + # plt.show() + + # o = _preprocessing_ops.flow_augmentation(flow, trans, inv_t, [4, 8]) + # print n[:, :, :] + # print n[0, 0, 1], n[0, 0, 0] + # print n[1, 0, 1], n[1, 0, 0] + # print n[2, 0, 1], n[2, 0, 0] + # print '---' + # print sess.run(o) + + """# Goes along width first!! + // Caffe, NKHW: ((n * K + k) * H + h) * W + w at point (n, k, h, w) + // TF, NHWK: ((n * H + h) * W + w) * K + k at point (n, h, w, k) + + H=5, W=10, K=2 + n=0, h=1, w=5, k=0 + + (2 * 10) + c + + 30 49 n[0, 1, 5, 0]""" + + +print(os.getpid()) +input("Press Enter to continue...") +main() + +# Last index is channel!! + +# K + +# value 13 should be at [0, 2, 7, 1] aka batch=0, height=1, width=0, channel=0. it is at index=20. +# +# items = { +# 'N': [0, 0], +# 'H': [5, 2], +# 'W': [10, 7], +# 'K': [2, 1], +# } +# +# for (i1, v1) in items.iteritems(): +# for (i2, v2) in items.iteritems(): +# for (i3, v3) in items.iteritems(): +# for (i4, v4) in items.iteritems(): +# if ((v1[1] * v2[0] + v2[1]) * v3[0] + v3[1]) * v4[0] + v4[1] == 55: +# print 'found it: ', i1, i2, i3, i4 diff --git a/Codes/inference.py b/Codes/inference.py new file mode 100644 index 0000000..0263339 --- /dev/null +++ b/Codes/inference.py @@ -0,0 +1,149 @@ +import tensorflow as tf +import os +import time +import numpy as np +import pickle + + +from models import generator +from utils import DataLoader, load, save, psnr_error +from constant import const +import evaluate + + +slim = tf.contrib.slim + +os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID" +os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU + +dataset_name = const.DATASET +test_folder = const.TEST_FOLDER + +num_his = const.NUM_HIS +height, width = 256, 256 + +snapshot_dir = const.SNAPSHOT_DIR +psnr_dir = const.PSNR_DIR +evaluate_name = const.EVALUATE + +print(const) + + +# define dataset +with tf.name_scope('dataset'): + test_video_clips_tensor = tf.placeholder(shape=[1, height, width, 3 * (num_his + 1)], + dtype=tf.float32) + test_inputs = test_video_clips_tensor[..., 0:num_his*3] + test_gt = test_video_clips_tensor[..., -3:] + print('test inputs = {}'.format(test_inputs)) + print('test prediction gt = {}'.format(test_gt)) + +# define testing generator function and +# in testing, only generator networks, there is no discriminator networks and flownet. +with tf.variable_scope('generator', reuse=None): + print('testing = {}'.format(tf.get_variable_scope().name)) + test_outputs = generator(test_inputs, layers=4, output_channel=3) + test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt) + + +config = tf.ConfigProto() +config.gpu_options.allow_growth = True +with tf.Session(config=config) as sess: + # dataset + data_loader = DataLoader(test_folder, height, width) + + # initialize weights + sess.run(tf.global_variables_initializer()) + print('Init global successfully!') + + # tf saver + saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None) + + restore_var = [v for v in tf.global_variables()] + loader = tf.train.Saver(var_list=restore_var) + + def inference_func(ckpt, dataset_name, evaluate_name): + load(loader, sess, ckpt) + + psnr_records = [] + videos_info = data_loader.videos + num_videos = len(videos_info.keys()) + total = 0 + timestamp = time.time() + + for video_name, video in videos_info.items(): + length = video['length'] + total += length + psnrs = np.empty(shape=(length,), dtype=np.float32) + + for i in range(num_his, length): + video_clip = data_loader.get_video_clips(video_name, i - num_his, i + 1) + psnr = sess.run(test_psnr_error, + feed_dict={test_video_clips_tensor: video_clip[np.newaxis, ...]}) + psnrs[i] = psnr + + print('video = {} / {}, i = {} / {}, psnr = {:.6f}'.format( + video_name, num_videos, i, length, psnr)) + + psnrs[0:num_his] = psnrs[num_his] + psnr_records.append(psnrs) + + result_dict = {'dataset': dataset_name, 'psnr': psnr_records, 'flow': [], 'names': [], 'diff_mask': []} + + used_time = time.time() - timestamp + print('total time = {}, fps = {}'.format(used_time, total / used_time)) + + # TODO specify what's the actual name of ckpt. + pickle_path = os.path.join(psnr_dir, os.path.split(ckpt)[-1]) + with open(pickle_path, 'wb') as writer: + pickle.dump(result_dict, writer, pickle.HIGHEST_PROTOCOL) + + results = evaluate.evaluate(evaluate_name, pickle_path) + print(results) + + + if os.path.isdir(snapshot_dir): + def check_ckpt_valid(ckpt_name): + is_valid = False + ckpt = '' + if ckpt_name.startswith('model.ckpt-'): + ckpt_name_splits = ckpt_name.split('.') + ckpt = str(ckpt_name_splits[0]) + '.' + str(ckpt_name_splits[1]) + ckpt_path = os.path.join(snapshot_dir, ckpt) + if os.path.exists(ckpt_path + '.index') and os.path.exists(ckpt_path + '.meta') and \ + os.path.exists(ckpt_path + '.data-00000-of-00001'): + is_valid = True + + return is_valid, ckpt + + def scan_psnr_folder(): + tested_ckpt_in_psnr_sets = set() + for test_psnr in os.listdir(psnr_dir): + tested_ckpt_in_psnr_sets.add(test_psnr) + return tested_ckpt_in_psnr_sets + + def scan_model_folder(): + saved_models = set() + for ckpt_name in os.listdir(snapshot_dir): + is_valid, ckpt = check_ckpt_valid(ckpt_name) + if is_valid: + saved_models.add(ckpt) + return saved_models + + tested_ckpt_sets = scan_psnr_folder() + while True: + all_model_ckpts = scan_model_folder() + new_model_ckpts = all_model_ckpts - tested_ckpt_sets + + for ckpt_name in new_model_ckpts: + # inference + ckpt = os.path.join(snapshot_dir, ckpt_name) + inference_func(ckpt, dataset_name, evaluate_name) + + tested_ckpt_sets.add(ckpt_name) + + print('waiting for models...') + evaluate.evaluate('compute_auc', psnr_dir) + time.sleep(60) + else: + inference_func(snapshot_dir, dataset_name, evaluate_name) diff --git a/Codes/loss_functions.py b/Codes/loss_functions.py new file mode 100644 index 0000000..ca97966 --- /dev/null +++ b/Codes/loss_functions.py @@ -0,0 +1,54 @@ +import tensorflow as tf +import numpy as np + + +def flow_loss(gen_flows, gt_flows): + print(gen_flows['flow']) + return tf.reduce_mean(tf.abs(gen_flows['flow'] - gt_flows['flow'])) + + +def intensity_loss(gen_frames, gt_frames, l_num): + """ + Calculates the sum of lp losses between the predicted and ground truth frames. + + @param gen_frames: The predicted frames at each scale. + @param gt_frames: The ground truth frames at each scale + @param l_num: 1 or 2 for l1 and l2 loss, respectively). + + @return: The lp loss. + """ + return tf.reduce_mean(tf.abs((gen_frames - gt_frames) ** l_num)) + + +def gradient_loss(gen_frames, gt_frames, alpha): + """ + Calculates the sum of GDL losses between the predicted and ground truth frames. + + @param gen_frames: The predicted frames at each scale. + @param gt_frames: The ground truth frames at each scale + @param alpha: The power to which each gradient term is raised. + + @return: The GDL loss. + """ + # calculate the loss for each scale + # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively. + + channels = gen_frames.get_shape().as_list()[-1] + pos = tf.constant(np.identity(channels), dtype=tf.float32) # 3 x 3 + neg = -1 * pos + filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1] + filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]] + strides = [1, 1, 1, 1] # stride of (1, 1) + padding = 'SAME' + + gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding)) + gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding)) + gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding)) + gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding)) + + grad_diff_x = tf.abs(gt_dx - gen_dx) + grad_diff_y = tf.abs(gt_dy - gen_dy) + + # condense into one tensor and avg + return tf.reduce_mean(grad_diff_x ** alpha + grad_diff_y ** alpha) + diff --git a/Codes/models.py b/Codes/models.py new file mode 100644 index 0000000..8c20134 --- /dev/null +++ b/Codes/models.py @@ -0,0 +1,44 @@ +import tensorflow as tf + +import unet +import pix2pix + +from flownet2.src.flowlib import flow_to_image +from flownet2.src.flownet_sd.flownet_sd import FlowNetSD # Ok +from flownet2.src.training_schedules import LONG_SCHEDULE +from flownet2.src.net import Mode + + +slim = tf.contrib.slim + + +def generator(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=3): + return unet.unet(inputs, layers, features_root, filter_size, pool_size, output_channel) + + +def discriminator(inputs, num_filers=(128, 256, 512, 512)): + logits, end_points = pix2pix.pix2pix_discriminator(inputs, num_filers) + return logits, end_points['predictions'] + + +def flownet(input_a, input_b, height, width, reuse=None): + net = FlowNetSD(mode=Mode.TEST) + # train preds flow + input_a = (input_a + 1.0) / 2.0 # flownet receives image with color space in [0, 1] + input_b = (input_b + 1.0) / 2.0 # flownet receives image with color space in [0, 1] + # input size is 384 x 512 + input_a = tf.image.resize_images(input_a, [height, width]) + input_b = tf.image.resize_images(input_b, [height, width]) + flows = net.model( + inputs={'input_a': input_a, 'input_b': input_b}, + training_schedule=LONG_SCHEDULE, + trainable=False, reuse=reuse + ) + return flows['flow'] + + +def initialize_flownet(sess, checkpoint): + flownet_vars = slim.get_variables_to_restore(include=['FlowNetSD']) + flownet_saver = tf.train.Saver(flownet_vars) + print('FlownetSD restore from {}!'.format(checkpoint)) + flownet_saver.restore(sess, checkpoint) diff --git a/Codes/models/download_pretrains.sh b/Codes/models/download_pretrains.sh new file mode 100644 index 0000000..08e58ec --- /dev/null +++ b/Codes/models/download_pretrains.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "Downloading trained models on ped1, ped2 and avenue datasets ....." + +wget "https://ofhz9a.bn.files.1drv.com/y4mHfGdUxGoa7NnnI-eIlTqInymvmHyDOSGGw5zKM08jOGukHKdYdxmtZiEEh-rCAWK7oTDTstQ5bKazvjdyTtsIUW7zxcKnVgIsgZg6DpEb-Qdq83Zmnnw6nv7pX5HhiOkMxc42CLl65QK0A2Mv1Cmj-062Pyodm-Mt5r24Id3_glS0NT6BdvAp7-VbevkXygnmXQrcXRQU6d0y1cHlZJ2ig/pretrains.tar.gz" +tar -xvf pretrains.tar.gz +rm pretrains.tar.gz + +echo "Download pretrains successfully..." + + diff --git a/Codes/pix2pix.py b/Codes/pix2pix.py new file mode 100644 index 0000000..941c8fc --- /dev/null +++ b/Codes/pix2pix.py @@ -0,0 +1,274 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Implementation of the Image-to-Image Translation model. +This network represents a port of the following work: + Image-to-Image Translation with Conditional Adversarial Networks + Phillip Isola, Jun-Yan Zhu, Tinghui Zhou and Alexei A. Efros + Arxiv, 2017 + https://phillipi.github.io/pix2pix/ +A reference implementation written in Lua can be found at: +https://github.com/phillipi/pix2pix/blob/master/models.lua +""" +import collections +import functools + +import tensorflow as tf + +layers = tf.contrib.layers + + +def pix2pix_arg_scope(): + """Returns a default argument scope for isola_net. + Returns: + An arg scope. + """ + # These parameters come from the online port, which don't necessarily match + # those in the paper. + # TODO(nsilberman): confirm these values with Philip. + instance_norm_params = { + 'center': True, + 'scale': True, + 'epsilon': 0.00001, + } + + with tf.contrib.framework.arg_scope( + [layers.conv2d, layers.conv2d_transpose], + normalizer_fn=layers.instance_norm, + normalizer_params=instance_norm_params, + weights_initializer=tf.random_normal_initializer(0, 0.02)) as sc: + return sc + + +def upsample(net, num_outputs, kernel_size, method='nn_upsample_conv'): + """Upsamples the given inputs. + Args: + net: A `Tensor` of size [batch_size, height, width, filters]. + num_outputs: The number of output filters. + kernel_size: A list of 2 scalars or a 1x2 `Tensor` indicating the scale, + relative to the inputs, of the output dimensions. For example, if kernel + size is [2, 3], then the output height and width will be twice and three + times the input size. + method: The upsampling method. + Returns: + An `Tensor` which was upsampled using the specified method. + Raises: + ValueError: if `method` is not recognized. + """ + net_shape = tf.shape(net) + height = net_shape[1] + width = net_shape[2] + + if method == 'nn_upsample_conv': + net = tf.image.resize_nearest_neighbor( + net, [kernel_size[0] * height, kernel_size[1] * width]) + net = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None) + elif method == 'conv2d_transpose': + net = layers.conv2d_transpose( + net, num_outputs, [4, 4], stride=kernel_size, activation_fn=None) + else: + raise ValueError('Unknown method: [%s]', method) + + return net + + +class Block( + collections.namedtuple('Block', ['num_filters', 'decoder_keep_prob'])): + """Represents a single block of encoder and decoder processing. + The Image-to-Image translation paper works a bit differently than the original + U-Net model. In particular, each block represents a single operation in the + encoder which is concatenated with the corresponding decoder representation. + A dropout layer follows the concatenation and convolution of the concatenated + features. + """ + pass + + +def _default_generator_blocks(): + """Returns the default generator block definitions. + Returns: + A list of generator blocks. + """ + return [ + Block(64, 0.5), + Block(128, 0.5), + Block(256, 0.5), + Block(512, 0), + Block(512, 0), + Block(512, 0), + Block(512, 0), + ] + + +def pix2pix_generator(net, + num_outputs, + blocks=None, + upsample_method='nn_upsample_conv', + is_training=False): # pylint: disable=unused-argument + """Defines the network architecture. + Args: + net: A `Tensor` of size [batch, height, width, channels]. Note that the + generator currently requires square inputs (e.g. height=width). + num_outputs: The number of (per-pixel) outputs. + blocks: A list of generator blocks or `None` to use the default generator + definition. + upsample_method: The method of upsampling images, one of 'nn_upsample_conv' + or 'conv2d_transpose' + is_training: Whether or not we're in training or testing mode. + Returns: + A `Tensor` representing the model output and a dictionary of model end + points. + Raises: + ValueError: if the input heights do not match their widths. + """ + end_points = {} + + blocks = blocks or _default_generator_blocks() + + input_size = net.get_shape().as_list() + height, width = input_size[1], input_size[2] + if height != width: + raise ValueError('The input height must match the input width.') + + input_size[3] = num_outputs + + upsample_fn = functools.partial(upsample, method=upsample_method) + + encoder_activations = [] + + ########### + # Encoder # + ########### + with tf.variable_scope('encoder'): + with tf.contrib.framework.arg_scope( + [layers.conv2d], + kernel_size=[4, 4], + stride=2, + activation_fn=tf.nn.leaky_relu): + + for block_id, block in enumerate(blocks): + # No normalizer for the first encoder layers as per 'Image-to-Image', + # Section 5.1.1 + if block_id == 0: + # First layer doesn't use normalizer_fn + net = layers.conv2d(net, block.num_filters, normalizer_fn=None) + elif block_id < len(blocks) - 1: + net = layers.conv2d(net, block.num_filters) + else: + # Last layer doesn't use activation_fn nor normalizer_fn + net = layers.conv2d( + net, block.num_filters, activation_fn=None, normalizer_fn=None) + + encoder_activations.append(net) + end_points['encoder%d' % block_id] = net + + ########### + # Decoder # + ########### + reversed_blocks = list(blocks) + reversed_blocks.reverse() + + with tf.variable_scope('decoder'): + # Dropout is used at both train and test time as per 'Image-to-Image', + # Section 2.1 (last paragraph). + with tf.contrib.framework.arg_scope([layers.dropout], is_training=is_training): + + for block_id, block in enumerate(reversed_blocks): + if block_id > 0: + net = tf.concat([net, encoder_activations[-block_id - 1]], axis=3) + + # The Relu comes BEFORE the upsample op: + net = tf.nn.relu(net) + net = upsample_fn(net, block.num_filters, [2, 2]) + if block.decoder_keep_prob > 0: + net = layers.dropout(net, keep_prob=block.decoder_keep_prob) + end_points['decoder%d' % block_id] = net + + with tf.variable_scope('output'): + logits = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None) + # print(logits) + # logits = tf.reshape(logits, input_size) + + end_points['logits'] = logits + end_points['predictions'] = tf.tanh(logits) + + return logits, end_points + + +def pix2pix_discriminator(net, num_filters, padding=2, is_training=False): + """Creates the Image2Image Translation Discriminator. + Args: + net: A `Tensor` of size [batch_size, height, width, channels] representing + the input. + num_filters: A list of the filters in the discriminator. The length of the + list determines the number of layers in the discriminator. + padding: Amount of reflection padding applied before each convolution. + is_training: Whether or not the model is training or testing. + Returns: + A logits `Tensor` of size [batch_size, N, N, 1] where N is the number of + 'patches' we're attempting to discriminate and a dictionary of model end + points. + """ + del is_training + end_points = {} + + num_layers = len(num_filters) + + def padded(net, scope): + if padding: + with tf.variable_scope(scope): + spatial_pad = tf.constant( + [[0, 0], [padding, padding], [padding, padding], [0, 0]], + dtype=tf.int32) + return tf.pad(net, spatial_pad, 'REFLECT') + else: + return net + + with tf.contrib.framework.arg_scope( + [layers.conv2d], + kernel_size=[4, 4], + stride=2, + padding='valid', + activation_fn=tf.nn.leaky_relu): + + # No normalization on the input layer. + net = layers.conv2d( + padded(net, 'conv0'), num_filters[0], normalizer_fn=None, scope='conv0') + + end_points['conv0'] = net + + for i in range(1, num_layers - 1): + net = layers.conv2d( + padded(net, 'conv%d' % i), num_filters[i], scope='conv%d' % i) + end_points['conv%d' % i] = net + + # Stride 1 on the last layer. + net = layers.conv2d( + padded(net, 'conv%d' % (num_layers - 1)), + num_filters[-1], + stride=1, + scope='conv%d' % (num_layers - 1)) + end_points['conv%d' % (num_layers - 1)] = net + + # 1-dim logits, stride 1, no activation, no normalization. + logits = layers.conv2d( + padded(net, 'conv%d' % num_layers), + 1, + stride=1, + activation_fn=None, + normalizer_fn=None, + scope='conv%d' % num_layers) + end_points['logits'] = logits + end_points['predictions'] = tf.sigmoid(logits) + return logits, end_points diff --git a/Codes/requirements.txt b/Codes/requirements.txt new file mode 100644 index 0000000..91d2206 --- /dev/null +++ b/Codes/requirements.txt @@ -0,0 +1,9 @@ +numpy==1.14.1 +scipy==1.0.0 +matplotlib==2.1.2 +tensorflow==1.4.1 +tensorflow_gpu==1.4.1 +Pillow==5.0.0 +pypng==0.0.18 +scikit_learn==0.19.1 +opencv-python==3.2.0.6 diff --git a/Codes/runner.sh b/Codes/runner.sh new file mode 100644 index 0000000..f0b545f --- /dev/null +++ b/Codes/runner.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +/home/liuwen/ssd/datasets/ped2/training/frames +/home/liuwen/ssd/datasets/ped2/testing/frames + +python train.py --dataset ped2 \ + --train_folder ../Data/ped2/training/frames \ + --test_folder ../Data/ped2/testing/frames \ + --gpu 0 \ + --iters 80000 + + +python inference.py --dataset ped2 \ + --test_folder /home/liuwen/ssd/datasets/ped2/testing/frames \ + --gpu 3 \ + --snapshot_dir models/pretrains/ped2 + + +python train.py --dataset avenue \ + --train_folder ../Data/avenue/training/frames \ + --test_folder ../Data/avenue/testing/frames \ + --gpu 2 \ + --iters 80000 + +python inference.py --dataset avenue \ + --test_folder ../Data/avenue/testing/frames \ + --gpu 3 + + +python train.py --dataset ped1 \ + --train_folder ../Data/ped1/training/frames \ + --test_folder ../Data/ped1/testing/frames \ + --gpu 2 \ + --iters 80000 + +python inference.py --dataset ped1 \ + --test_folder ../Data/ped1/testing/frames \ + --gpu 3 + +python train.py --dataset ped1 \ + --train_folder ../Data/ped1/training/frames \ + --test_folder ../Data/ped1/testing/frames \ + --gpu 0 \ + --iters 80000 \ + --config training_hyper_params/hyper_params_lp_0.ini + +python inference.py --dataset ped1 \ + --test_folder ../Data/ped1/testing/frames \ + --gpu 1 \ + --config training_hyper_params/hyper_params_lp_0.ini + + +python inference.py --dataset ped2 \ + --test_folder /home/liuwen/ssd/datasets/ped2/testing/frames \ + --gpu 1 \ + --snapshot_dir models/pretrains/ped2
\ No newline at end of file diff --git a/Codes/train.py b/Codes/train.py new file mode 100644 index 0000000..42a8fc9 --- /dev/null +++ b/Codes/train.py @@ -0,0 +1,215 @@ +import tensorflow as tf +import os + +from models import generator, discriminator, flownet, initialize_flownet +from loss_functions import intensity_loss, gradient_loss +from utils import DataLoader, load, save, psnr_error +from constant import const + + +os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID" +os.environ['CUDA_VISIBLE_DEVICES'] = const.GPU + +dataset_name = const.DATASET +train_folder = const.TRAIN_FOLDER +test_folder = const.TEST_FOLDER + +batch_size = const.BATCH_SIZE +iterations = const.ITERATIONS +num_his = const.NUM_HIS +height, width = 256, 256 +flow_height, flow_width = const.FLOW_HEIGHT, const.FLOW_WIDTH + +l_num = const.L_NUM +alpha_num = const.ALPHA_NUM +lam_lp = const.LAM_LP +lam_gdl = const.LAM_GDL +lam_adv = const.LAM_ADV +lam_flow = const.LAM_FLOW +adversarial = (lam_adv != 0) + +summary_dir = const.SUMMARY_DIR +snapshot_dir = const.SNAPSHOT_DIR + + +print(const) + +# define dataset +with tf.name_scope('dataset'): + train_loader = DataLoader(train_folder, resize_height=height, resize_width=width) + train_dataset = train_loader(batch_size=batch_size, time_steps=num_his, num_pred=1) + + train_it = train_dataset.make_one_shot_iterator() + train_videos_clips_tensor = train_it.get_next() + train_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)]) + + train_inputs = train_videos_clips_tensor[..., 0:num_his*3] + train_gt = train_videos_clips_tensor[..., -3:] + + print('train inputs = {}'.format(train_inputs)) + print('train prediction gt = {}'.format(train_gt)) + + test_loader = DataLoader(test_folder, resize_height=height, resize_width=width) + test_dataset = test_loader(batch_size=batch_size, time_steps=num_his, num_pred=1) + test_it = test_dataset.make_one_shot_iterator() + test_videos_clips_tensor = test_it.get_next() + test_videos_clips_tensor.set_shape([batch_size, height, width, 3*(num_his + 1)]) + + test_inputs = test_videos_clips_tensor[..., 0:num_his*3] + test_gt = test_videos_clips_tensor[..., -3:] + + print('test inputs = {}'.format(test_inputs)) + print('test prediction gt = {}'.format(test_gt)) + +# define training generator function +with tf.variable_scope('generator', reuse=None): + print('training = {}'.format(tf.get_variable_scope().name)) + train_outputs = generator(train_inputs, layers=4, output_channel=3) + train_psnr_error = psnr_error(gen_frames=train_outputs, gt_frames=train_gt) + +# define testing generator function +with tf.variable_scope('generator', reuse=True): + print('testing = {}'.format(tf.get_variable_scope().name)) + test_outputs = generator(test_inputs, layers=4, output_channel=3) + test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt) + + +# define intensity loss +if lam_lp != 0: + lp_loss = intensity_loss(gen_frames=train_outputs, gt_frames=train_gt, l_num=l_num) +else: + lp_loss = tf.constant(0.0, dtype=tf.float32) + + +# define gdl loss +if lam_gdl != 0: + gdl_loss = gradient_loss(gen_frames=train_outputs, gt_frames=train_gt, alpha=alpha_num) +else: + gdl_loss = tf.constant(0.0, dtype=tf.float32) + + +# define flow loss +if lam_flow != 0: + train_gt_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_gt, + height=flow_height, width=flow_width, reuse=None) + train_pred_flow = flownet(input_a=train_inputs[..., -3:], input_b=train_outputs, + height=flow_height, width=flow_width, reuse=True) + flow_loss = tf.reduce_mean(tf.abs(train_gt_flow - train_pred_flow)) +else: + flow_loss = tf.constant(0.0, dtype=tf.float32) + + +# define adversarial loss +if adversarial: + with tf.variable_scope('discriminator', reuse=None): + real_logits, real_outputs = discriminator(inputs=train_gt) + with tf.variable_scope('discriminator', reuse=True): + fake_logits, fake_outputs = discriminator(inputs=train_outputs) + + print('real_outputs = {}'.format(real_outputs)) + print('fake_outputs = {}'.format(fake_outputs)) + + adv_loss = tf.reduce_mean(tf.square(fake_outputs - 1) / 2) + dis_loss = tf.reduce_mean(tf.square(real_outputs - 1) / 2) + tf.reduce_mean(tf.square(fake_outputs) / 2) +else: + adv_loss = tf.constant(0.0, dtype=tf.float32) + dis_loss = tf.constant(0.0, dtype=tf.float32) + + +with tf.name_scope('training'): + g_loss = tf.add_n([lp_loss * lam_lp, gdl_loss * lam_gdl, adv_loss * lam_adv, flow_loss * lam_flow], name='g_loss') + + g_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='g_step') + g_lrate = tf.train.piecewise_constant(g_step, boundaries=const.LRATE_G_BOUNDARIES, values=const.LRATE_G) + g_optimizer = tf.train.AdamOptimizer(learning_rate=g_lrate, name='g_optimizer') + g_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') + + g_train_op = g_optimizer.minimize(g_loss, global_step=g_step, var_list=g_vars, name='g_train_op') + + if adversarial: + # training discriminator + d_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='d_step') + d_lrate = tf.train.piecewise_constant(d_step, boundaries=const.LRATE_D_BOUNDARIES, values=const.LRATE_D) + d_optimizer = tf.train.AdamOptimizer(learning_rate=d_lrate, name='g_optimizer') + d_vars = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') + + d_train_op = d_optimizer.minimize(dis_loss, global_step=d_step, var_list=d_vars, name='d_optimizer') + else: + d_step = None + d_lrate = None + d_train_op = None + +# add all to summaries +tf.summary.scalar(tensor=train_psnr_error, name='train_psnr_error') +tf.summary.scalar(tensor=test_psnr_error, name='test_psnr_error') +tf.summary.scalar(tensor=g_loss, name='g_loss') +tf.summary.scalar(tensor=adv_loss, name='adv_loss') +tf.summary.scalar(tensor=dis_loss, name='dis_loss') +tf.summary.image(tensor=train_outputs, name='train_outputs') +tf.summary.image(tensor=train_gt, name='train_gt') +tf.summary.image(tensor=test_outputs, name='test_outputs') +tf.summary.image(tensor=test_gt, name='test_gt') +summary_op = tf.summary.merge_all() + +config = tf.ConfigProto() +config.gpu_options.allow_growth = True +with tf.Session(config=config) as sess: + # summaries + summary_writer = tf.summary.FileWriter(summary_dir, graph=sess.graph) + + # initialize weights + sess.run(tf.global_variables_initializer()) + print('Init successfully!') + + if lam_flow != 0: + # initialize flownet + initialize_flownet(sess, const.FLOWNET_CHECKPOINT) + + # tf saver + saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None) + restore_var = [v for v in tf.global_variables()] + loader = tf.train.Saver(var_list=restore_var) + if os.path.isdir(snapshot_dir): + ckpt = tf.train.get_checkpoint_state(snapshot_dir) + if ckpt and ckpt.model_checkpoint_path: + load(loader, sess, ckpt.model_checkpoint_path) + else: + print('No checkpoint file found.') + else: + load(loader, sess, snapshot_dir) + + _step, _loss, _summaries = 0, None, None + while _step < iterations: + try: + if adversarial: + print('Training discriminator...') + _, _d_lr, _d_step, _dis_loss = sess.run([d_train_op, d_lrate, d_step, dis_loss]) + else: + _d_step = 0 + _d_lr = 0 + _dis_loss = 0 + + print('Training generator...') + _, _g_lr, _step, _lp_loss, _gdl_loss, _adv_loss, _flow_loss, _g_loss, _train_psnr, _summaries = sess.run( + [g_train_op, g_lrate, g_step, lp_loss, gdl_loss, adv_loss, flow_loss, g_loss, train_psnr_error, summary_op]) + + if _step % 10 == 0: + print('DiscriminatorModel: Step {} | Global Loss: {:.6f}, lr = {:.6f}'.format(_d_step, _dis_loss, _d_lr)) + print('GeneratorModel : Step {}, lr = {:.6f}'.format(_step, _g_lr)) + print(' Global Loss : ', _g_loss) + print(' intensity Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_lp_loss, lam_lp, _lp_loss * lam_lp)) + print(' gradient Loss : ({:.4f} * {:.4f} = {:.4f})'.format( _gdl_loss, lam_gdl, _gdl_loss * lam_gdl)) + print(' adversarial Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_adv_loss, lam_adv, _adv_loss * lam_adv)) + print(' flownet Loss : ({:.4f} * {:.4f} = {:.4f})'.format(_flow_loss, lam_flow, _flow_loss * lam_flow)) + print(' PSNR Error : ', _train_psnr) + if _step % 100 == 0: + summary_writer.add_summary(_summaries, global_step=_step) + print('Save summaries...') + + if _step % 1000 == 0: + save(saver, sess, snapshot_dir, _step) + + except tf.errors.OutOfRangeError: + print('Finish successfully!') + save(saver, sess, snapshot_dir, _step) + break diff --git a/Codes/training_hyper_params/hyper_params.ini b/Codes/training_hyper_params/hyper_params.ini new file mode 100644 index 0000000..99dbf00 --- /dev/null +++ b/Codes/training_hyper_params/hyper_params.ini @@ -0,0 +1,103 @@ +[ped2] +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +L_NUM = 2 +# the power to which each gradient term is raised in GDL loss +ALPHA_NUM = 1 +# the percentage of the adversarial loss to use in the combined loss +LAM_ADV = 0.05 +# the percentage of the lp loss to use in the combined loss +LAM_LP = 1 +# the percentage of the GDL loss to use in the combined loss +LAM_GDL = 1 +# the percentage of the different frame loss +LAM_FLOW = 2 + +LRATE_G = [0.0001, 0.00001] +LRATE_G_BOUNDARIES = [7000] + +LRATE_D = [0.00001, 0.000001] +LRATE_D_BOUNDARIES = [7000] + +[ped1] +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +L_NUM = 2 +# the power to which each gradient term is raised in GDL loss +ALPHA_NUM = 1 +# the percentage of the adversarial loss to use in the combined loss +LAM_ADV = 0.05 +# the percentage of the lp loss to use in the combined loss +LAM_LP = 1 +# the percentage of the GDL loss to use in the combined loss +LAM_GDL = 1 +# the percentage of the different frame loss +LAM_FLOW = 0.01 + +LRATE_G = [0.0001, 0.00001] +LRATE_G_BOUNDARIES = [40000] + +LRATE_D = [0.00001, 0.000001] +LRATE_D_BOUNDARIES = [40000] + + +[avenue] +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +L_NUM = 2 +# the power to which each gradient term is raised in GDL loss +ALPHA_NUM = 1 +# the percentage of the adversarial loss to use in the combined loss +LAM_ADV = 0.05 +# the percentage of the lp loss to use in the combined loss, +# we found in smaller lp is slightly better in avenue, but not too much difference. +LAM_LP = 0 +# the percentage of the GDL loss to use in the combined loss +LAM_GDL = 1 +# the percentage of the different frame loss +LAM_FLOW = 2 + +LRATE_G = [0.0002, 0.00002] +LRATE_G_BOUNDARIES = [100000] + +LRATE_D = [0.00002, 0.000002] +LRATE_D_BOUNDARIES = [100000] + + +[shanghaitech] +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +L_NUM = 2 +# the power to which each gradient term is raised in GDL loss +ALPHA_NUM = 1 +# the percentage of the adversarial loss to use in the combined loss +LAM_ADV = 0.05 +# the percentage of the lp loss to use in the combined loss +LAM_LP = 1 +# the percentage of the GDL loss to use in the combined loss +LAM_GDL = 1 +# the percentage of the different frame loss +LAM_FLOW = 2 + +LRATE_G = [0.0002, 0.00002] +LRATE_G_BOUNDARIES = [50000] + +LRATE_D = [0.00002, 0.000002] +LRATE_D_BOUNDARIES = [50000] + + +[toydata] +# for lp loss. e.g, 1 or 2 for l1 and l2 loss, respectively) +L_NUM = 2 +# the power to which each gradient term is raised in GDL loss +ALPHA_NUM = 1 +# the percentage of the adversarial loss to use in the combined loss +LAM_ADV = 0.05 +# the percentage of the lp loss to use in the combined loss +LAM_LP = 1 +# the percentage of the GDL loss to use in the combined loss +LAM_GDL = 1 +# the percentage of the different frame loss +LAM_FLOW = 2 + +LRATE_G = [0.0001, 0.00001] +LRATE_G_BOUNDARIES = [7000] + +LRATE_D = [0.00001, 0.000001] +LRATE_D_BOUNDARIES = [7000] diff --git a/Codes/unet.py b/Codes/unet.py new file mode 100644 index 0000000..ac4c6aa --- /dev/null +++ b/Codes/unet.py @@ -0,0 +1,42 @@ +import tensorflow as tf +from tensorflow.contrib.layers import conv2d, max_pool2d, conv2d_transpose + + +def unet(inputs, layers, features_root=64, filter_size=3, pool_size=2, output_channel=1): + """ + :param inputs: input tensor, shape[None, height, width, channel] + :param layers: number of layers + :param features_root: number of features in the first layer + :param filter_size: size of each conv layer + :param pool_size: size of each max pooling layer + :param output_channel: number of channel for output tensor + :return: a tensor, shape[None, height, width, output_channel] + """ + + in_node = inputs + conv = [] + for layer in range(0, layers): + features = 2**layer*features_root + + conv1 = conv2d(inputs=in_node, num_outputs=features, kernel_size=filter_size) + conv2 = conv2d(inputs=conv1, num_outputs=features, kernel_size=filter_size) + conv.append(conv2) + + if layer < layers - 1: + in_node = max_pool2d(inputs=conv2, kernel_size=pool_size, padding='SAME') + # in_node = conv2d(inputs=conv2, num_outputs=features, kernel_size=filter_size, stride=2) + + in_node = conv[-1] + + for layer in range(layers-2, -1, -1): + features = 2**(layer+1)*features_root + + h_deconv = conv2d_transpose(inputs=in_node, num_outputs=features//2, kernel_size=pool_size, stride=pool_size) + h_deconv_concat = tf.concat([conv[layer], h_deconv], axis=3) + + conv1 = conv2d(inputs=h_deconv_concat, num_outputs=features//2, kernel_size=filter_size) + in_node = conv2d(inputs=conv1, num_outputs=features//2, kernel_size=filter_size) + + output = conv2d(inputs=in_node, num_outputs=output_channel, kernel_size=filter_size, activation_fn=None) + output = tf.tanh(output) + return output diff --git a/Codes/utils.py b/Codes/utils.py new file mode 100644 index 0000000..efeab8e --- /dev/null +++ b/Codes/utils.py @@ -0,0 +1,227 @@ +import tensorflow as tf +import numpy as np +from collections import OrderedDict +import os +import glob +import cv2 + + +rng = np.random.RandomState(2017) + + +def np_load_frame(filename, resize_height, resize_width): + image_decoded = cv2.imread(filename) + image_resized = cv2.resize(image_decoded, (resize_width, resize_height)) + image_resized = image_resized.astype(dtype=np.float32) + image_resized = (image_resized / 127.5) - 1.0 + return image_resized + + +class DataLoader(object): + def __init__(self, video_folder, resize_height=256, resize_width=256): + self.dir = video_folder + self.videos = {} + self._resize_height = resize_height + self._resize_width = resize_width + self.setup() + + def __call__(self, batch_size, time_steps, num_pred=1): + video_info_list = list(self.videos.values()) + num_videos = len(video_info_list) + + clip_length = time_steps + num_pred + resize_height, resize_width = self._resize_height, self._resize_width + + def video_clip_generator(): + v_id = -1 + while True: + v_id = (v_id + 1) % num_videos + + video_info = video_info_list[v_id] + start = rng.randint(0, video_info['length'] - clip_length) + video_clip = [] + for frame_id in range(start, start + clip_length): + video_clip.append(np_load_frame(video_info['frame'][frame_id], resize_height, resize_width)) + video_clip = np.concatenate(video_clip, axis=2) + + yield video_clip + + # video clip paths + dataset = tf.data.Dataset.from_generator(generator=video_clip_generator, + output_types=tf.float32, + output_shapes=[resize_height, resize_width, clip_length * 3]) + print('generator dataset, {}'.format(dataset)) + dataset = dataset.prefetch(buffer_size=1000) + dataset = dataset.shuffle(buffer_size=1000).batch(batch_size) + print('epoch dataset, {}'.format(dataset)) + + return dataset + + def __getitem__(self, video_name): + assert video_name in self.videos.keys(), 'video = {} is not in {}!'.format(video_name, self.videos.keys()) + return self.videos[video_name] + + def setup(self): + videos = glob.glob(os.path.join(self.dir, '*')) + for video in sorted(videos): + video_name = video.split('/')[-1] + self.videos[video_name] = {} + self.videos[video_name]['path'] = video + self.videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg')) + self.videos[video_name]['frame'].sort() + self.videos[video_name]['length'] = len(self.videos[video_name]['frame']) + + def get_video_clips(self, video, start, end): + # assert video in self.videos, 'video = {} must in {}!'.format(video, self.videos.keys()) + # assert start >= 0, 'start = {} must >=0!'.format(start) + # assert end <= self.videos[video]['length'], 'end = {} must <= {}'.format(video, self.videos[video]['length']) + + batch = [] + for i in range(start, end): + image = np_load_frame(self.videos[video]['frame'][i], self._resize_height, self._resize_width) + batch.append(image) + + return np.concatenate(batch, axis=2) + + # def get_video_clips(self, video_name, start, end): + # video_idx = np.arange(start, end) + # video_clip = np.empty(shape=[self._resize_height, self._resize_height, 3*len(video_idx)], dtype=np.float32) + # for idx, v_idx in enumerate(video_idx): + # filename = self.videos[video_name]['frame'][v_idx] + # video_clip[..., idx*3:(idx+1)*3] = np_load_frame(filename, self._resize_height, self._resize_width) + # + # return video_clip + + +def log10(t): + """ + Calculates the base-10 log of each element in t. + + @param t: The tensor from which to calculate the base-10 log. + + @return: A tensor with the base-10 log of each element in t. + """ + + numerator = tf.log(t) + denominator = tf.log(tf.constant(10, dtype=numerator.dtype)) + return numerator / denominator + + +def psnr_error(gen_frames, gt_frames): + """ + Computes the Peak Signal to Noise Ratio error between the generated images and the ground + truth images. + + @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the + generator model. + @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for + each frame in gen_frames. + + @return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the + batch. + """ + shape = tf.shape(gen_frames) + num_pixels = tf.to_float(shape[1] * shape[2] * shape[3]) + gt_frames = (gt_frames + 1.0) / 2.0 + gen_frames = (gen_frames + 1.0) / 2.0 + square_diff = tf.square(gt_frames - gen_frames) + + batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3]))) + return tf.reduce_mean(batch_errors) + + +def sharp_diff_error(gen_frames, gt_frames, channels=3): + """ + Computes the Sharpness Difference error between the generated images and the ground truth + images. + + @param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the + generator model. + @param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for + each frame in gen_frames. + @param channels: The number of channels, 3 is RGB and 1 is Gray, default is 3. + + @return: A scalar tensor. The Sharpness Difference error over each frame in the batch. + """ + shape = tf.shape(gen_frames) + num_pixels = tf.to_float(shape[1] * shape[2] * shape[3]) + + # gradient difference + # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively. + # TODO: Could this be simplified with one filter [[-1, 2], [0, -1]]? + pos = tf.constant(np.identity(channels), dtype=tf.float32) + neg = -1 * pos + filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1] + filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]] + strides = [1, 1, 1, 1] # stride of (1, 1) + padding = 'SAME' + + gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding)) + gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding)) + gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding)) + gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding)) + + gen_grad_sum = gen_dx + gen_dy + gt_grad_sum = gt_dx + gt_dy + + grad_diff = tf.abs(gt_grad_sum - gen_grad_sum) + + batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(grad_diff, [1, 2, 3]))) + return tf.reduce_mean(batch_errors) + + +def diff_mask(gen_frames, gt_frames, min_value=-1, max_value=1): + # normalize to [0, 1] + delta = max_value - min_value + gen_frames = (gen_frames - min_value) / delta + gt_frames = (gt_frames - min_value) / delta + + gen_gray_frames = tf.image.rgb_to_grayscale(gen_frames) + gt_gray_frames = tf.image.rgb_to_grayscale(gt_frames) + + diff = tf.abs(gen_gray_frames - gt_gray_frames) + return diff + + +def load(saver, sess, ckpt_path): + saver.restore(sess, ckpt_path) + print("Restored model parameters from {}".format(ckpt_path)) + + +def save(saver, sess, logdir, step): + model_name = 'model.ckpt' + checkpoint_path = os.path.join(logdir, model_name) + if not os.path.exists(logdir): + os.makedirs(logdir) + saver.save(sess, checkpoint_path, global_step=step) + print('The checkpoint has been created.') + + +# if __name__ == '__main__': +# os.environ['CUDA_DEVICES_ORDER'] = "PCI_BUS_ID" +# os.environ['CUDA_VISIBLE_DEVICES'] = '0' +# +# data_loader = DataLoader('/home/liuwen/ssd/datasets/avenue/training/frames') +# dataset, epoch_size = data_loader(10, 4, 1, 3, 1) +# +# # debug +# iteration = dataset.make_one_shot_iterator() +# batch_video_clip_tensor = iteration.get_next() +# +# config = tf.ConfigProto() +# config.gpu_options.allow_growth = True +# with tf.Session(config=config) as sess: +# # batch_video_clip = sess.run(next(it)) +# +# for i in range(100): +# batch_video_clip = sess.run(batch_video_clip_tensor) +# # print(batch_video_clip.shape) +# +# for vid, video_clip in enumerate(batch_video_clip): +# for fid, frame in enumerate(video_clip): +# print(i, vid, fid) +# cv2.imshow('visualization', frame + 0.5) +# cv2.waitKey(100) + + + |
