1 files changed, 83 insertions, 10 deletions
diff --git a/cli/app/search/search_class.py b/cli/app/search/search_class.py
index 7eab4bc..42ff605 100644
--- a/cli/app/search/search_class.py
+++ b/cli/app/search/search_class.py
@@ -24,7 +24,29 @@ from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32,
 from app.search.vector import truncated_z_sample, truncated_z_single, \
   create_labels, create_labels_uniform
 
-def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit=-1):
+feature_layer_names = {
+  '1a': "InceptionV3/Conv2d_1a_3x3",
+  '2a': "InceptionV3/Conv2d_2a_3x3",
+  '2b': "InceptionV3/Conv2d_2b_3x3",
+  '3a': "InceptionV3/Conv2d_3a_3x3",
+  '3b': "InceptionV3/Conv2d_3b_3x3",
+  '4a': "InceptionV3/Conv2d_4a_3x3",
+  '5b': "InceptionV3/Mixed_5b",
+  '5c': "InceptionV3/Mixed_5c",
+  '5d': "InceptionV3/Mixed_5d",
+  '6a': "InceptionV3/Mixed_6a",
+  '6b': "InceptionV3/Mixed_6b",
+  '6c': "InceptionV3/Mixed_6c",
+  '6d': "InceptionV3/Mixed_6d",
+  '6e': "InceptionV3/Mixed_6e",
+  '7a': "InceptionV3/Mixed_7a",
+  '7b': "InceptionV3/Mixed_7b",
+  '7c': "InceptionV3/Mixed_7c",
+}
+
+def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag,
+  opt_limit=-1, opt_stochastic_clipping=0, opt_label_clipping=0,
+  opt_use_feature_detector=False, opt_feature_layers=[1,2,4,7], opt_snapshot_interval=20):
   sess = tf.compat.v1.Session()
   tf.reset_default_graph()
   generator = hub.Module('https://tfhub.dev/deepmind/biggan-512/2')
@@ -42,12 +64,14 @@ def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_ta
     if index == opt_limit:
       break
     out_fns[index] = os.path.basename(path)
-    fp_frames = find_nearest_vector(sess, generator, path, opt_dims, out_images, out_labels, out_latent, opt_steps, index)
+    fp_frames = find_nearest_vector(sess, generator, path, opt_dims, out_images, out_labels, out_latent, opt_steps, index,
+      opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers)
     if opt_video:
       export_video(fp_frames)
   sess.close()
 
-def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_labels, out_latent, opt_steps, index):
+def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_labels, out_latent, opt_steps, index,
+  opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers, opt_snapshot_interval):
   """
   Find the closest latent and class vectors for an image. Store the class vector in an HDF5.
   """
@@ -74,13 +98,58 @@ def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_la
     'truncation': input_trunc,
   })
 
+  ## clip the Z encoding
+
+  clipped_encoding = tf.where(tf.abs(latent) >= params.clip,
+    tf.random.uniform([BATCH_SIZE, Z_DIM], minval=-params.clip, maxval=params.clip), input_z)
+  clip_latent = tf.assign(latent, clipped_encoding)
+
   target = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, img_size, img_size, num_channels))
 
-  # loss = tf.losses.compute_weighted_loss(tf.square(output - target), weights=mask)
-  loss = tf.compat.v1.losses.mean_squared_error(target, output)
+  ## normalize the Y encoding
+  normalized_labels = tf.nn.l2_normalize(input_y)
+  clip_labels = tf.assign(input_y, normalized_labels)
+
+  ## if computing Feature loss, use these encoders
+  if opt_use_feature_detector:
+    pix_square_diff = tf.square((target_img - gen_img) / 2.0)
+    mse_loss = tf.reduce_mean(pix_square_diff)
+
+    feature_extractor = hub.Module("https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1")
+
+    # Convert images from range [-1, 1] channels_first to [0, 1] channels_last.
+    gen_img_1 = tf.transpose(output / 2.0 + 0.5, [0, 2, 3, 1])
+    target_img_1 = tf.transpose(target / 2.0 + 0.5, [0, 2, 3, 1])
+
+    # Convert images to appropriate size for feature extraction.
+    height, width = hub.get_expected_image_size(feature_extractor)
+    gen_img_1 = tf.image.resize_images(gen_img_1, [height, width])
+    target_img_1 = tf.image.resize_images(target_img_1, [height, width])
+
+    gen_feat_ex = feature_extractor(dict(images=gen_img_1), as_dict=True, signature='image_feature_vector')
+    target_feat_ex = feature_extractor(dict(images=target_img_1), as_dict=True, signature='image_feature_vector')
 
-  train_step_z = tf.train.AdamOptimizer(z_lr).minimize(loss, var_list=[input_z], name='AdamOpterZ')
-  train_step_y = tf.train.AdamOptimizer(y_lr).minimize(loss, var_list=[input_y], name='AdamOpterY')
+    feat_loss = tf.constant(0.0)
+
+    for layer in opt_feature_layers:
+      layer_name = feature_layer_names[layer]
+      gen_feat = gen_feat_ex[layer_name]
+      target_feat = target_feat_ex[layer_name]
+      feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
+      feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers)
+
+    # Batch reconstruction error.
+    inv_loss = 1.0 * mse_loss + 1.0 * feat_loss
+
+    train_step_z = tf.train.AdamOptimizer(learning_rate=z_lr, beta1=0.9, beta2=0.999)
+                            .minimize(inv_loss, var_list=[input_z])
+    train_step_y = tf.train.AdamOptimizer(learning_rate=y_lr, beta1=0.9, beta2=0.999)
+                            .minimize(inv_loss, var_list=[input_y])
+
+  else:
+    loss = tf.compat.v1.losses.mean_squared_error(target, output)
+    train_step_z = tf.train.AdamOptimizer(z_lr).minimize(loss, var_list=[input_z], name='AdamOpterZ')
+    train_step_y = tf.train.AdamOptimizer(y_lr).minimize(loss, var_list=[input_y], name='AdamOpterY')
 
   target_im, fp_frames = load_target_image(opt_fp_in)
 
@@ -106,12 +175,16 @@ def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_la
     print("Preparing to iterate...")
     for i in range(opt_steps):
       curr_loss, _, _ = sess.run([loss, train_step_z, train_step_y], feed_dict=feed_dict)
-
       if i % 20 == 0:
+        print('iter: {}, loss: {}'.format(i, curr_loss))
+      if opt_stochastic_clipping != 0 && i % opt_stochastic_clipping == 0:
+        sess.run(clip_latent)
+      if opt_label_clipping != 0 && i % opt_label_clipping == 0:
+        sess.run(clip_labels)
+      if opt_snapshot_interval != 0 && i % opt_snapshot_interval == 0:
         phi_guess = sess.run(output)
         guess_im = imgrid(imconvert_uint8(phi_guess), cols=1)
-        imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(int(i / 20))), guess_im)
-        print('iter: {}, loss: {}'.format(i, curr_loss))
+        imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(int(i / opt_snapshot_interval))), guess_im)
   except KeyboardInterrupt:
     pass