diff options
| author | tingchunw <tingchunw@nvidia.com> | 2017-12-04 16:52:46 -0800 |
|---|---|---|
| committer | tingchunw <tingchunw@nvidia.com> | 2017-12-04 16:52:46 -0800 |
| commit | 9054cf9b0c327a5077fd0793abe178f400da3315 (patch) | |
| tree | 3c69c07bdcba86c47d8442648fd69c0434e04136 /train.py | |
| parent | f9e9999541d67a908a169cc88407675133130e1f (diff) | |
first commit
Diffstat (limited to 'train.py')
| -rwxr-xr-x | train.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/train.py b/train.py new file mode 100755 index 0000000..4965481 --- /dev/null +++ b/train.py @@ -0,0 +1,118 @@ +### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. +### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). +import time +from collections import OrderedDict +from options.train_options import TrainOptions +from data.data_loader import CreateDataLoader +from models.models import create_model +import util.util as util +from util.visualizer import Visualizer +import os +import numpy as np +import torch +from torch.autograd import Variable + +opt = TrainOptions().parse() +iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') +if opt.continue_train: + try: + start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) + except: + start_epoch, epoch_iter = 1, 0 + print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) +else: + start_epoch, epoch_iter = 1, 0 + +if opt.debug: + opt.display_freq = 1 + opt.print_freq = 1 + opt.niter = 1 + opt.niter_decay = 0 + opt.max_dataset_size = 10 + +data_loader = CreateDataLoader(opt) +dataset = data_loader.load_data() +dataset_size = len(data_loader) +print('#training images = %d' % dataset_size) + +model = create_model(opt) +visualizer = Visualizer(opt) + +total_steps = (start_epoch-1) * dataset_size + epoch_iter +for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1): + epoch_start_time = time.time() + if epoch != start_epoch: + epoch_iter = epoch_iter % dataset_size + for i, data in enumerate(dataset, start=epoch_iter): + iter_start_time = time.time() + total_steps += opt.batchSize + epoch_iter += opt.batchSize + + # whether to collect output images + save_fake = total_steps % opt.display_freq == 0 + + ############## Forward Pass ###################### + losses, generated = model(Variable(data['label']), Variable(data['inst']), + Variable(data['image']), Variable(data['feat']), infer=save_fake) + + # sum per device losses + losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ] + loss_dict = dict(zip(model.module.loss_names, losses)) + + # calculate final loss scalar + loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5 + loss_G = loss_dict['G_GAN'] + loss_dict['G_GAN_Feat'] + loss_dict['G_VGG'] + + ############### Backward Pass #################### + # update generator weights + model.module.optimizer_G.zero_grad() + loss_G.backward() + model.module.optimizer_G.step() + + # update discriminator weights + model.module.optimizer_D.zero_grad() + loss_D.backward() + model.module.optimizer_D.step() + + #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) + + ############## Display results and errors ########## + ### print out errors + if total_steps % opt.print_freq == 0: + errors = {k: v.data[0] if not isinstance(v, (int,long,float)) else v for k, v in loss_dict.items()} + t = (time.time() - iter_start_time) / opt.batchSize + visualizer.print_current_errors(epoch, epoch_iter, errors, t) + visualizer.plot_current_errors(errors, total_steps) + + ### display output images + if save_fake: + visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)), + ('synthesized_image', util.tensor2im(generated.data[0])), + ('real_image', util.tensor2im(data['image'][0]))]) + visualizer.display_current_results(visuals, epoch, total_steps) + + ### save latest model + if total_steps % opt.save_latest_freq == 0: + print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) + model.module.save('latest') + np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d') + + # end of epoch + iter_end_time = time.time() + print('End of epoch %d / %d \t Time Taken: %d sec' % + (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) + + ### save model for this epoch + if epoch % opt.save_epoch_freq == 0: + print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) + model.module.save('latest') + model.module.save(epoch) + np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d') + + ### instead of only training the local enhancer, train the entire network after certain iterations + if (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global): + model.module.update_fixed_params() + + ### linearly decay learning rate after certain iterations + if epoch > opt.niter: + model.module.update_learning_rate() |
