2 files changed, 66 insertions, 109 deletions
diff --git a/cli/app/search/json.py b/cli/app/search/json.py
index e889587..fbdfe84 100644
--- a/cli/app/search/json.py
+++ b/cli/app/search/json.py
@@ -68,13 +68,14 @@ def make_params_dense(tag, folder_id):
     "dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"),
     "inv_layer": "Generator_2/G_Z/Reshape:0", 
     "decay_lr": True, 
-    "inv_it": 15000, 
+    "inv_it": 10000,
     "generator_path": "https://tfhub.dev/deepmind/biggan-512/2", 
     "attention_map_layer": "Generator_2/attention/Softmax:0",
     "pre_trained_latent": True, 
     "likeli_loss": False, 
     "init_hi": 0.001, 
     "lr": 0.01, 
+    "lr_quad": 0.001,
     "norm_loss": False, 
     "generator_fixed_inputs": {
       "truncation": 1.0
diff --git a/cli/app/search/search_dense.py b/cli/app/search/search_dense.py
index 823b91e..645e4bb 100644
--- a/cli/app/search/search_dense.py
+++ b/cli/app/search/search_dense.py
@@ -171,11 +171,9 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
   if params.mse:
     pix_square_diff = tf.square((target_img - gen_img) / 2.0)
     mse_loss = tf.reduce_mean(pix_square_diff)
-    ssim_loss = 1 - tf.image.ssim(im1, im2, max_val=1.0)
     img_mse_err = tf.reduce_mean(pix_square_diff, axis=[1,2,3])
   else:
     mse_loss = tf.constant(0.0)
-    ssim_loss = tf.constant(0.0)
     img_mse_err = tf.constant(0.0)
 
   # Use custom features for image comparison.
@@ -183,118 +181,32 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
     feature_extractor = hub.Module(str(params.feature_extractor_path))
 
     # Convert images from range [-1, 1] channels_first to [0, 1] channels_last.
-    gen_img_1 = tf.transpose(gen_img / 2.0 + 0.5, [0, 2, 3, 1])
-    target_img_1 = tf.transpose(target_img / 2.0 + 0.5, [0, 2, 3, 1])
+    gen_img_ch = tf.transpose(gen_img / 2.0 + 0.5, [0, 2, 3, 1])
+    target_img_ch = tf.transpose(target_img / 2.0 + 0.5, [0, 2, 3, 1])
 
     # Convert images to appropriate size for feature extraction.
     height, width = hub.get_expected_image_size(feature_extractor)
-    gen_img_1 = tf.image.resize_images(gen_img_1, [height, width])
-    target_img_1 = tf.image.resize_images(target_img_1, [height, width])
+    img_w = IMG_SHAPE[0]
 
-    gen_feat_ex = feature_extractor(dict(images=gen_img_1), as_dict=True, signature='image_feature_vector')
-    target_feat_ex = feature_extractor(dict(images=target_img_1), as_dict=True, signature='image_feature_vector')
+    feat_loss, img_feat_err = feature_loss(gen_img_ch, target_img_ch, None, None, height, width)
 
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_7a"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_7a"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss = tf.reduce_mean(feat_square_diff) * 0.334
-    # img_feat_err = tf.reduce_mean(feat_square_diff, axis=1) * 0.334
+    feat_loss_a, feat_err_a = feature_loss(gen_img_ch, target_img_ch, 0, 0, height, width)
+    feat_loss_b, feat_err_b = feature_loss(gen_img_ch, target_img_ch, img_w - width, 0, height, width)
+    feat_loss_c, feat_err_c = feature_loss(gen_img_ch, target_img_ch, 0, img_w - width, height, width)
+    feat_loss_d, feat_err_d = feature_loss(gen_img_ch, target_img_ch, img_w - width, img_w - width, height, width)
 
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_7b"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_7b"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.333
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.333
-
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_7c"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_7c"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.333
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.333
-
-    # # gen_feat = gen_feat_ex["InceptionV3/Mixed_5a"]
-    # # target_feat = target_feat_ex["InceptionV3/Mixed_5a"]
-    # # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # # feat_loss += tf.reduce_mean(feat_square_diff) * 0.16
-    # # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.16
-
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_7b"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_7b"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.33
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1)
-
-    # # gen_feat = gen_feat_ex["InceptionV3/Mixed_7c"]
-    # # target_feat = target_feat_ex["InceptionV3/Mixed_7c"]
-    # # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # # feat_loss += tf.reduce_mean(feat_square_diff) * 0.17
-    # # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.17
-
-    feat_loss = tf.constant(0.0)
-    img_feat_err = tf.constant(0.0)
-
-    if type(opt_feature_layers) == str:
-      opt_feature_layers = opt_feature_layers.split(',')
-    fixed_layers = []
-    for layer in opt_feature_layers:
-      if ',' in layer:
-        fixed_layers += layer.split(',')
-      else:
-        fixed_layers.append(layer)
-
-    for layer in fixed_layers:
-      if layer in feature_layer_names:
-        layer_name = feature_layer_names[layer]
-        gen_feat = gen_feat_ex[layer_name]
-        target_feat = target_feat_ex[layer_name]
-        feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-        feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers)
-        img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) / len(opt_feature_layers)
-
-    # conv1 1, conv1 2, conv3 2 and conv4 2
-    # gen_feat = gen_feat_ex["InceptionV3/Conv2d_1a_3x3"]
-    # target_feat = target_feat_ex["InceptionV3/Conv2d_1a_3x3"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss = tf.reduce_mean(feat_square_diff) * 0.25
-    # img_feat_err = tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
-    # gen_feat = gen_feat_ex["InceptionV3/Conv2d_2a_3x3"]
-    # target_feat = target_feat_ex["InceptionV3/Conv2d_2a_3x3"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
-    # gen_feat = gen_feat_ex["InceptionV3/Conv2d_3b_1x1"]
-    # target_feat = target_feat_ex["InceptionV3/Conv2d_3b_1x1"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_6a"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_6a"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
-    # gen_feat = gen_feat_ex["InceptionV3/Mixed_7a"]
-    # target_feat = target_feat_ex["InceptionV3/Mixed_7a"]
-    # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
-    # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
-    # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
+    feat_loss_quad = feat_loss_a + feat_loss_b + feat_loss_c + feat_loss_d
+    img_feat_err_quad = feat_err_a + feat_err_b + feat_err_c + feat_err_d
 
   else:
     feat_loss = tf.constant(0.0)
     img_feat_err = tf.constant(0.0)
+    feat_loss_quad = tf.constant(0.0)
+    img_feat_err_quad = tf.constant(0.0)
 
-  # Per image reconstruction error.
-  img_rec_err = params.lambda_mse * img_mse_err\
-      + params.lambda_feat * img_feat_err
-
-  # Batch reconstruction error.
-  rec_loss = params.lambda_mse * ssim_loss * params.lambda_mse * mse_loss + params.lambda_feat * feat_loss
-
-  # Total inversion loss.
-  inv_loss = rec_loss
+  img_rec_err = params.lambda_mse * img_mse_err + params.lambda_feat * img_feat_err
+  inv_loss = params.lambda_mse * mse_loss + params.lambda_feat * feat_loss
+  inv_loss_quad = params.lambda_mse * mse_loss + params.lambda_feat * inv_loss_quad
 
   # --------------------------
   # Optimizer.
@@ -304,13 +216,19 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
         params.inv_it / params.decay_n, 0.1, staircase=True)
   else:
     lrate = tf.constant(params.lr)
+  
   # trained_params = [label, latent, encoding]
   trained_params = [latent, encoding]
+
   optimizer = tf.train.AdamOptimizer(learning_rate=lrate, beta1=0.9, beta2=0.999)
   inv_train_op = optimizer.minimize(inv_loss, var_list=trained_params,
                                     global_step=inv_step)
   reinit_optimizer = tf.variables_initializer(optimizer.variables())
 
+  optimizer_quad = tf.train.AdamOptimizer(learning_rate=params.lr_quad, beta1=0.9, beta2=0.999)
+  inv_train_op_quad = optimizer_quad.minimize(inv_loss_quad, var_list=trained_params, global_step=inv_step)
+  reinit_optimizer_quad = tf.variables_initializer(optimizer_quad.variables())
+
   # --------------------------
   # Noise source.
   # --------------------------
@@ -394,15 +312,20 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
     sess.run([
       encoding.assign(gen_encoding),
       reinit_optimizer,
+      reinit_optimizer_quad,
     ])
 
     # Main optimization loop.
     print("Beginning dense iteration...")
     for _ in range(params.inv_it):
 
-      _inv_loss, _mse_loss, _feat_loss,\
-          _lrate, _ssim_loss, _ = sess.run([inv_loss, mse_loss, feat_loss,
-          lrate, ssim_loss, inv_train_op])
+      if it < params.inv_it * 0.75:
+        _inv_loss, _mse_loss, _feat_loss,\
+            _lrate, _ = sess.run([inv_loss, mse_loss, feat_loss,
+            lrate, inv_train_op])
+      else:
+        _inv_loss, _mse_loss, _feat_loss, _ = sess.run([inv_loss_quad, mse_loss, feat_loss_quad, inv_train_op_quad])
+        _lrate = params.lr_quad
 
       if params.clipping or params.stochastic_clipping:
         sess.run(clip_latent)
@@ -412,9 +335,9 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
         # Log losses.
         etime = time.time() - start_time
         print('It [{:8d}] time [{:5.1f}] total [{:.4f}] mse [{:.4f}] '
-              'feat [{:.4f}] ssim [{:.4f}] '
+              'feat [{:.4f}] '
               'lr [{:.4f}]'.format(it, etime, _inv_loss, _mse_loss,
-              _feat_loss, _ssim_loss, _lrate))
+              _feat_loss, _lrate))
 
         sys.stdout.flush()
 
@@ -476,3 +399,36 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
   print('End of inversion.')
   out_file.close()
   sess.close()
+
+def feature_loss(img_a, img_b, y, x, height, width):
+  if y is not None:
+    img_a = tf.image.crop_to_bounding_box(img_a, y, x, height, width)
+    img_b = tf.image.crop_to_bounding_box(img_b, y, x, height, width)
+  else:
+    img_a = tf.image.resize_images(img_a, [height, width])
+    img_b = tf.image.resize_images(img_b, [height, width])
+
+  gen_feat_ex = feature_extractor(dict(images=img_a), as_dict=True, signature='image_feature_vector')
+  target_feat_ex = feature_extractor(dict(images=img_b), as_dict=True, signature='image_feature_vector')
+
+  feat_loss = tf.constant(0.0)
+  img_feat_err = tf.constant(0.0)
+
+  if type(opt_feature_layers) == str:
+    opt_feature_layers = opt_feature_layers.split(',')
+  fixed_layers = []
+  for layer in opt_feature_layers:
+    if ',' in layer:
+      fixed_layers += layer.split(',')
+    else:
+      fixed_layers.append(layer)
+
+  for layer in fixed_layers:
+    if layer in feature_layer_names:
+      layer_name = feature_layer_names[layer]
+      gen_feat = gen_feat_ex[layer_name]
+      target_feat = target_feat_ex[layer_name]
+      feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
+      feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers)
+      img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) / len(opt_feature_layers)
+  return feat_loss, img_feat_err