### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). import time from collections import OrderedDict from options.train_options import TrainOptions from data.data_loader import CreateDataLoader from models.models import create_model import util.util as util from util.visualizer import Visualizer import os import numpy as np import torch from torch.autograd import Variable opt = TrainOptions().parse() iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') if opt.continue_train: try: start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) except: start_epoch, epoch_iter = 1, 0 print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) else: start_epoch, epoch_iter = 1, 0 print('Initializing a new epoch') if opt.debug: opt.display_freq = 1 opt.print_freq = 1 opt.niter = 1 opt.niter_decay = 0 opt.max_dataset_size = 10 data_loader = CreateDataLoader(opt) dataset = data_loader.load_data() dataset_size = len(data_loader) print('dataset size = %d' % dataset_size) model = create_model(opt) visualizer = Visualizer(opt) total_steps = (start_epoch-1) * dataset_size + epoch_iter display_delta = total_steps % opt.display_freq print_delta = total_steps % opt.print_freq save_delta = total_steps % opt.save_latest_freq print("epoch = {} {}".format(start_epoch, start_epoch + opt.niter)) for epoch in range(start_epoch, start_epoch + opt.niter): epoch_start_time = time.time() # if epoch != start_epoch: # epoch_iter = epoch_iter % dataset_size epoch_iter = 0 np.savetxt(iter_path, (int(epoch)+1, epoch_iter), delimiter=',', fmt='%d') for i, data in enumerate(dataset): #, start=epoch_iter iter_start_time = time.time() total_steps += opt.batchSize epoch_iter += opt.batchSize # whether to collect output images save_fake = total_steps % opt.display_freq == display_delta ############## Forward Pass ###################### losses, generated = model(Variable(data['label']), Variable(data['inst']), Variable(data['image']), Variable(data['feat']), infer=save_fake) # sum per device losses losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ] loss_dict = dict(zip(model.module.loss_names, losses)) # calculate final loss scalar loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5 loss_G = loss_dict['G_GAN'] + loss_dict.get('G_GAN_Feat',0) + loss_dict.get('G_VGG',0) ############### Backward Pass #################### # update generator weights model.module.optimizer_G.zero_grad() loss_G.backward() model.module.optimizer_G.step() # update discriminator weights model.module.optimizer_D.zero_grad() loss_D.backward() model.module.optimizer_D.step() #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) ############## Display results and errors ########## ### print out errors if total_steps % opt.print_freq == print_delta: errors = {k: v.data[0] if not isinstance(v, int) else v for k, v in loss_dict.items()} t = (time.time() - iter_start_time) / opt.batchSize visualizer.print_current_errors(epoch, epoch_iter, errors, t) visualizer.plot_current_errors(errors, total_steps) ### display output images #if save_fake: # visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)), # ('synthesized_image', util.tensor2im(generated.data[0])), # ('real_image', util.tensor2im(data['image'][0]))]) # visualizer.display_current_results(visuals, epoch, total_steps) ### save latest model if total_steps % opt.save_latest_freq == save_delta: print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) model.module.save('latest') model.module.save(str(epoch)) np.savetxt(iter_path, (int(epoch)+1, epoch_iter), delimiter=',', fmt='%d') iter_end_time = time.time() print('End of epoch %d \t Time Taken: %d sec' % (epoch, time.time() - epoch_start_time)) print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) model.module.save(str(epoch)) np.savetxt(iter_path, (int(epoch)+1, 0), delimiter=',', fmt='%d') model.module.save('latest') # ### instead of only training the local enhancer, train the entire network after certain iterations # # if (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global): # # model.module.update_fixed_params() # # ### linearly decay learning rate after certain iterations # # if opt.niter != 0 and epoch > opt.niter: # # model.module.update_learning_rate()