Modified README

author: cam <cameron@ideum.com> 2016-10-10 01:54:16 -0600
committer: cam <cameron@ideum.com> 2016-10-10 01:54:16 -0600
commit: 43ebfac2db256c294e6dfef8c5eb2037df8350c1 (patch)
tree: 27340b49dccc15998f681d39b5f83e0724a590f4 /neural_style.py
parent: 86ad6bd064e9bac38018a77a059a623176a3247a (diff)
1 files changed, 74 insertions, 46 deletions
diff --git a/neural_style.py b/neural_style.py
index 0db8403..565dbc0 100644
--- a/neural_style.py
+++ b/neural_style.py
@@ -1,5 +1,3 @@
-import matplotlib.pyplot as plt           
-import tensorflow.python
 import tensorflow as tf
 import numpy as np 
 import scipy.io  
@@ -20,11 +18,11 @@ def parse_args():
 
   # options for single image
   parser.add_argument('--verbose', action='store_true',
-    help="Boolean flag indicating if statements should be printed to the console.")
+    help='Boolean flag indicating if statements should be printed to the console.')
 
   parser.add_argument('--img_name', type=str, 
-    default="result",
-    help="Filename of the output image.")
+    default='result',
+    help='Filename of the output image.')
 
   parser.add_argument('--style_imgs', nargs='+', type=str,
     help='Filenames of the style images (example: starry-night.jpg)', 
@@ -73,7 +71,7 @@ def parse_args():
   parser.add_argument('--content_loss_function', type=int,
     default=1,
     choices=[1, 2, 3],
-    help='A few different constants for the content layer loss functions have been presented. (default: %(default)s)')
+    help='Different constants for the content layer loss functions. (default: %(default)s)')
   
   parser.add_argument('--content_layers', type=str, 
     default=['conv4_2'],
@@ -112,12 +110,13 @@ def parse_args():
     help='Seed for the random number generator. (default: %(default)s)')
   
   parser.add_argument('--model_weights', type=str, 
-    default='imagenet-vgg-verydeep-19.mat')
+    default='imagenet-vgg-verydeep-19.mat',
+    help='Weights and biases of the VGG-19 network.')
   
   parser.add_argument('--pooling_type', type=str,
     default='avg',
     choices=['avg', 'max'],
-    help="Type of pooling in convolutional neural network. (default: %(default)s)")
+    help='Type of pooling in convolutional neural network. (default: %(default)s)')
   
   parser.add_argument('--device', type=str, 
     default='/gpu:0',
@@ -139,17 +138,23 @@ def parse_args():
     help='Learning rate parameter for the Adam optimizer. (default: %(default)s)')
   
   parser.add_argument('--max_iterations', type=int, 
-    default=1e3,
+    default=1000,
     help='Max number of iterations for the Adam or L-BFGS optimizer. (default: %(default)s)')
+
+  parser.add_argument('--print_iterations', type=int, 
+    default=50,
+    help='Number of iterations between optimizer print statements. (default: %(default)s)')
   
   # options for video frames
   parser.add_argument('--video', action='store_true', 
     help='Boolean flag indicating if the user is generating a video.')
 
-  parser.add_argument('--start_frame', type=int, default=1,
+  parser.add_argument('--start_frame', type=int, 
+  	default=1,
     help='First frame number.')
   
-  parser.add_argument('--end_frame', type=int, default=1,
+  parser.add_argument('--end_frame', type=int, 
+  	default=1,
     help='Last frame number.')
   
   parser.add_argument('--first_frame_type', type=str,
@@ -189,9 +194,22 @@ def parse_args():
   parser.add_argument('--prev_frame_indices', nargs='+', type=int, 
     default=[1],
     help='Previous frames to consider for longterm temporal consistency.')
+
+  parser.add_argument('--first_frame_iterations', type=int, 
+    default=2000,
+    help='Maximum number of optimizer iterations of the first frame. (default: %(default)s)')
   
+  parser.add_argument('--frame_iterations', type=int, 
+    default=800,
+    help='Maximum number of optimizer iterations for each frame after the first frame. (default: %(default)s)')
+
   args = parser.parse_args()
 
+  # normalize weights
+  args.style_layer_weights   = norm(args.style_layer_weights)
+  args.content_layer_weights = norm(args.content_layer_weights)
+  args.style_imgs_weights    = norm(args.style_imgs_weights)
+
   # create directories for output
   if args.video:
     maybe_make_directory(args.video_output_dir)
@@ -208,7 +226,7 @@ def parse_args():
 vgg19_mean = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
 
 def build_vgg19(input_img):
-  if args.verbose: print("\nBUILDING VGG-19 NETWORK")
+  if args.verbose: print('\nBUILDING VGG-19 NETWORK')
   net = {}
   _, h, w, d     = input_img.shape
   
@@ -324,9 +342,13 @@ def content_layer_loss(p, x):
   _, h, w, d = p.get_shape()
   M = h.value * w.value
   N = d.value
-  loss = (1./(2 * N**0.5 * M**0.5 )) * tf.reduce_sum(tf.pow((x - p), 2))
-  #loss = (1./2.) * tf.reduce_sum(tf.pow((x - p), 2))
-  #loss = (1./(N * M)) * tf.reduce_sum(tf.pow((x - p), 2)) 
+  if args.content_loss_function   == 1:
+  	K = 1. / (2 * N**0.5 * M**0.5)
+  elif args.content_loss_function == 2:
+  	K = 1. / 2.
+  elif args.content_loss_function == 3:  
+  	K = 1. / (N * M)
+  loss = K * tf.reduce_sum(tf.pow((x - p), 2))
   return loss
 
 def gram_matrix(x, area, depth):
@@ -398,7 +420,6 @@ def sum_content_losses(sess, net, content_img):
     p = sess.run(net[layer])
     x = net[layer]
     p = tf.convert_to_tensor(p)
-    x = tf.convert_to_tensor(x)
     content_loss += content_layer_loss(p, x) * weight
   content_loss /= float(len(args.content_layers))
   return content_loss
@@ -471,7 +492,7 @@ def write_image(path, img):
 def preprocess(img, mean):
   # BGR to RGB
   img = img[...,::-1]
-  # shape (H, W, D) to (1, H, W, D)
+  # shape (h, w, d) to (1, h, w, d)
   img = img[np.newaxis,:,:,:]
   # subtract mean
   img -= mean
@@ -480,7 +501,7 @@ def preprocess(img, mean):
 def postprocess(img, mean):
   # add mean
   img += mean
-  # shape (1, H, W, D) to (H, W, D)
+  # shape (1, h, w, d) to (h, w, d)
   img = img[0]
   img = np.clip(img, 0, 255).astype('uint8')
   # RGB to BGR
@@ -488,7 +509,7 @@ def postprocess(img, mean):
   return img
 
 def read_flow_file(path):
-  with open(path, "rb") as f:
+  with open(path, 'rb') as f:
     # 4 bytes header
     header = struct.unpack('4s', f.read(4))[0]
     # 4 bytes width, height    
@@ -515,6 +536,9 @@ def read_weights_file(path):
   weights = np.dstack([vals.astype(np.float32)] * 3)
   return weights
 
+def norm(weights):
+  return [float(i)/sum(weights) for i in weights]
+
 def maybe_make_directory(dir_path):
   if not os.path.exists(dir_path):  
     os.makedirs(dir_path)
@@ -558,13 +582,13 @@ def stylize(content_img, style_imgs, init_img, frame=None):
     optimizer = get_optimizer(L_total)
 
     if args.optimizer == 'adam':
-      minimize_with_adam(sess, net, optimizer, init_img)
+      minimize_with_adam(sess, net, optimizer, init_img, L_total)
     elif args.optimizer == 'lbfgs':
       minimize_with_lbfgs(sess, net, optimizer, init_img)
     
     output_img = sess.run(net['input'])
     
-    if args.is_original_colors:
+    if args.original_colors:
       output_img = convert_to_original_colors(np.copy(content_img), np.copy(output_img))
 
     if args.video:
@@ -579,9 +603,9 @@ def minimize_with_lbfgs(sess, net, optimizer, init_img):
   sess.run(net['input'].assign(init_img))
   optimizer.minimize(sess)
 
-def minimize_with_adam(sess, net, optimizer, init_img):
+def minimize_with_adam(sess, net, optimizer, init_img, loss):
   if args.verbose: print('MINIMIZING LOSS USING: ADAM OPTIMIZER')
-  train_op = optimizer.minimize(L_total)
+  train_op = optimizer.minimize(loss)
   init_op = tf.initialize_all_variables()
   sess.run(init_op)
   sess.run(net['input'].assign(init_img))
@@ -591,12 +615,13 @@ def minimize_with_adam(sess, net, optimizer, init_img):
     iterations += 1
 
 def get_optimizer(loss):
+  print_iterations = args.print_iterations if args.verbose else 0
   if args.optimizer == 'lbfgs':
     optimizer = tf.contrib.opt.ScipyOptimizerInterface(
       loss, 
       method='L-BFGS-B',
       options={'maxiter': args.max_iterations,
-                  'disp': args.verbose})
+                  'disp': print_iterations})
   elif args.optimizer == 'adam':
     optimizer = tf.train.AdamOptimizer(args.learning_rate)
   return optimizer
@@ -609,40 +634,42 @@ def write_video_output(frame, output_img):
 def write_image_output(output_img, content_img, style_imgs, init_img):
   out_dir = os.path.join(args.img_output_dir, args.img_name)
   maybe_make_directory(out_dir)
-  img_path = os.path.join(out_dir, "output.png")
-  content_path = os.path.join(out_dir, "content.png")
-  init_path = os.path.join(out_dir, "init.png")
+  img_path = os.path.join(out_dir, args.img_name+'.png')
+  content_path = os.path.join(out_dir, 'content.png')
+  init_path = os.path.join(out_dir, 'init.png')
 
   write_image(img_path, output_img)
   write_image(content_path, content_img)
   write_image(init_path, init_img)
   index = 0
   for style_img in style_imgs:
-    path = os.path.join(out_dir, str(index)+"_style.png")
+    path = os.path.join(out_dir, 'style_'+str(index)+'.png')
     write_image(path, style_img)
     index += 1
-
+  
   # save the configuration settings
-  out_file = os.path.join(out_dir, "meta_data.txt")
-  f = open(out_file, "w")
-  f.write("image name: {}\n".format(args.img_name))
-  f.write("content: {}\n".format(args.content_img))
+  out_file = os.path.join(out_dir, 'meta_data.txt')
+  f = open(out_file, 'w')
+  f.write('image name: {}\n'.format(args.img_name))
+  f.write('content: {}\n'.format(args.content_img))
   index = 0
   for style_img, weight in zip(args.style_imgs, args.style_imgs_weights):
-    f.write("styles ["+str(index)+"]: {} * {}\n".format(weight, style_img))
+    f.write('styles ['+str(index)+']: {} * {}\n'.format(weight, style_img))
+    index += 1
   index = 0
   if args.style_mask_imgs is not None:
     for mask in args.style_mask_imgs:
-      f.write("style masks ["+str(index)+"]: {}\n".format(mask))
-  f.write("init_type: {}\n".format(args.init_img_type))
-  f.write("content_weight: {}\n".format(args.content_weight))
-  f.write("style_weight: {}\n".format(args.style_weight))
-  f.write("tv_weight: {}\n".format(args.tv_weight))
-  f.write("content_layers: {}\n".format(args.content_layers))
-  f.write("style_layers: {}\n".format(args.style_layers))
-  f.write("optimizer_type: {}\n".format(args.optimizer))
-  f.write("max_iterations: {}\n".format(args.max_iterations))
-  f.write("max_image_size: {}\n".format(args.max_size))
+      f.write('style masks ['+str(index)+']: {}\n'.format(mask))
+      index += 1
+  f.write('init_type: {}\n'.format(args.init_img_type))
+  f.write('content_weight: {}\n'.format(args.content_weight))
+  f.write('style_weight: {}\n'.format(args.style_weight))
+  f.write('tv_weight: {}\n'.format(args.tv_weight))
+  f.write('content_layers: {}\n'.format(args.content_layers))
+  f.write('style_layers: {}\n'.format(args.style_layers))
+  f.write('optimizer_type: {}\n'.format(args.optimizer))
+  f.write('max_iterations: {}\n'.format(args.max_iterations))
+  f.write('max_image_size: {}\n'.format(args.max_size))
   f.close()
 
 '''
@@ -752,8 +779,7 @@ def warp_image(src, flow):
   # remap pixels to optical flow
   dst = cv2.remap(
     src, flow_map[0], flow_map[1], 
-    interpolation=cv2.INTER_CUBIC, 
-    borderMode=cv2.BORDER_TRANSPARENT)
+    interpolation=cv2.INTER_CUBIC, borderMode=cv2.BORDER_TRANSPARENT)
   return dst
 
 def convert_to_original_colors(content_img, stylized_img):
@@ -787,6 +813,7 @@ def render_video():
         content_frame = get_content_frame(frame)
         style_imgs = get_style_images(content_frame, args.style_scale)
         init_img = get_init_image(args.first_frame_type, content_frame, style_imgs, frame)
+        args.max_iterations = args.first_frame_iterations
         tick = time.time()
         stylize(content_frame, style_imgs, init_img, frame)
         tock = time.time()
@@ -795,6 +822,7 @@ def render_video():
         content_frame = get_content_frame(frame)
         style_imgs = get_style_images(content_frame, args.style_scale)
         init_img = get_init_image(args.init_frame_type, content_frame, style_imgs, frame)
+        args.max_iterations = args.frame_iterations
         tick = time.time()
         stylize(content_frame, style_imgs, init_img, frame)
         tock = time.time()
author	cam <cameron@ideum.com>	2016-10-10 01:54:16 -0600
committer	cam <cameron@ideum.com>	2016-10-10 01:54:16 -0600
commit	43ebfac2db256c294e6dfef8c5eb2037df8350c1 (patch)
tree	27340b49dccc15998f681d39b5f83e0724a590f4 /neural_style.py
parent	86ad6bd064e9bac38018a77a059a623176a3247a (diff)