summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cli/app/search/json.py3
-rw-r--r--cli/app/search/search_dense.py172
2 files changed, 66 insertions, 109 deletions
diff --git a/cli/app/search/json.py b/cli/app/search/json.py
index e889587..fbdfe84 100644
--- a/cli/app/search/json.py
+++ b/cli/app/search/json.py
@@ -68,13 +68,14 @@ def make_params_dense(tag, folder_id):
"dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"),
"inv_layer": "Generator_2/G_Z/Reshape:0",
"decay_lr": True,
- "inv_it": 15000,
+ "inv_it": 10000,
"generator_path": "https://tfhub.dev/deepmind/biggan-512/2",
"attention_map_layer": "Generator_2/attention/Softmax:0",
"pre_trained_latent": True,
"likeli_loss": False,
"init_hi": 0.001,
"lr": 0.01,
+ "lr_quad": 0.001,
"norm_loss": False,
"generator_fixed_inputs": {
"truncation": 1.0
diff --git a/cli/app/search/search_dense.py b/cli/app/search/search_dense.py
index 823b91e..645e4bb 100644
--- a/cli/app/search/search_dense.py
+++ b/cli/app/search/search_dense.py
@@ -171,11 +171,9 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
if params.mse:
pix_square_diff = tf.square((target_img - gen_img) / 2.0)
mse_loss = tf.reduce_mean(pix_square_diff)
- ssim_loss = 1 - tf.image.ssim(im1, im2, max_val=1.0)
img_mse_err = tf.reduce_mean(pix_square_diff, axis=[1,2,3])
else:
mse_loss = tf.constant(0.0)
- ssim_loss = tf.constant(0.0)
img_mse_err = tf.constant(0.0)
# Use custom features for image comparison.
@@ -183,118 +181,32 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
feature_extractor = hub.Module(str(params.feature_extractor_path))
# Convert images from range [-1, 1] channels_first to [0, 1] channels_last.
- gen_img_1 = tf.transpose(gen_img / 2.0 + 0.5, [0, 2, 3, 1])
- target_img_1 = tf.transpose(target_img / 2.0 + 0.5, [0, 2, 3, 1])
+ gen_img_ch = tf.transpose(gen_img / 2.0 + 0.5, [0, 2, 3, 1])
+ target_img_ch = tf.transpose(target_img / 2.0 + 0.5, [0, 2, 3, 1])
# Convert images to appropriate size for feature extraction.
height, width = hub.get_expected_image_size(feature_extractor)
- gen_img_1 = tf.image.resize_images(gen_img_1, [height, width])
- target_img_1 = tf.image.resize_images(target_img_1, [height, width])
+ img_w = IMG_SHAPE[0]
- gen_feat_ex = feature_extractor(dict(images=gen_img_1), as_dict=True, signature='image_feature_vector')
- target_feat_ex = feature_extractor(dict(images=target_img_1), as_dict=True, signature='image_feature_vector')
+ feat_loss, img_feat_err = feature_loss(gen_img_ch, target_img_ch, None, None, height, width)
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_7a"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_7a"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss = tf.reduce_mean(feat_square_diff) * 0.334
- # img_feat_err = tf.reduce_mean(feat_square_diff, axis=1) * 0.334
+ feat_loss_a, feat_err_a = feature_loss(gen_img_ch, target_img_ch, 0, 0, height, width)
+ feat_loss_b, feat_err_b = feature_loss(gen_img_ch, target_img_ch, img_w - width, 0, height, width)
+ feat_loss_c, feat_err_c = feature_loss(gen_img_ch, target_img_ch, 0, img_w - width, height, width)
+ feat_loss_d, feat_err_d = feature_loss(gen_img_ch, target_img_ch, img_w - width, img_w - width, height, width)
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_7b"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_7b"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.333
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.333
-
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_7c"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_7c"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.333
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.333
-
- # # gen_feat = gen_feat_ex["InceptionV3/Mixed_5a"]
- # # target_feat = target_feat_ex["InceptionV3/Mixed_5a"]
- # # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # # feat_loss += tf.reduce_mean(feat_square_diff) * 0.16
- # # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.16
-
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_7b"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_7b"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.33
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1)
-
- # # gen_feat = gen_feat_ex["InceptionV3/Mixed_7c"]
- # # target_feat = target_feat_ex["InceptionV3/Mixed_7c"]
- # # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # # feat_loss += tf.reduce_mean(feat_square_diff) * 0.17
- # # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.17
-
- feat_loss = tf.constant(0.0)
- img_feat_err = tf.constant(0.0)
-
- if type(opt_feature_layers) == str:
- opt_feature_layers = opt_feature_layers.split(',')
- fixed_layers = []
- for layer in opt_feature_layers:
- if ',' in layer:
- fixed_layers += layer.split(',')
- else:
- fixed_layers.append(layer)
-
- for layer in fixed_layers:
- if layer in feature_layer_names:
- layer_name = feature_layer_names[layer]
- gen_feat = gen_feat_ex[layer_name]
- target_feat = target_feat_ex[layer_name]
- feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers)
- img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) / len(opt_feature_layers)
-
- # conv1 1, conv1 2, conv3 2 and conv4 2
- # gen_feat = gen_feat_ex["InceptionV3/Conv2d_1a_3x3"]
- # target_feat = target_feat_ex["InceptionV3/Conv2d_1a_3x3"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss = tf.reduce_mean(feat_square_diff) * 0.25
- # img_feat_err = tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
- # gen_feat = gen_feat_ex["InceptionV3/Conv2d_2a_3x3"]
- # target_feat = target_feat_ex["InceptionV3/Conv2d_2a_3x3"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
- # gen_feat = gen_feat_ex["InceptionV3/Conv2d_3b_1x1"]
- # target_feat = target_feat_ex["InceptionV3/Conv2d_3b_1x1"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_6a"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_6a"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
-
- # gen_feat = gen_feat_ex["InceptionV3/Mixed_7a"]
- # target_feat = target_feat_ex["InceptionV3/Mixed_7a"]
- # feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
- # feat_loss += tf.reduce_mean(feat_square_diff) * 0.25
- # img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) * 0.25
+ feat_loss_quad = feat_loss_a + feat_loss_b + feat_loss_c + feat_loss_d
+ img_feat_err_quad = feat_err_a + feat_err_b + feat_err_c + feat_err_d
else:
feat_loss = tf.constant(0.0)
img_feat_err = tf.constant(0.0)
+ feat_loss_quad = tf.constant(0.0)
+ img_feat_err_quad = tf.constant(0.0)
- # Per image reconstruction error.
- img_rec_err = params.lambda_mse * img_mse_err\
- + params.lambda_feat * img_feat_err
-
- # Batch reconstruction error.
- rec_loss = params.lambda_mse * ssim_loss * params.lambda_mse * mse_loss + params.lambda_feat * feat_loss
-
- # Total inversion loss.
- inv_loss = rec_loss
+ img_rec_err = params.lambda_mse * img_mse_err + params.lambda_feat * img_feat_err
+ inv_loss = params.lambda_mse * mse_loss + params.lambda_feat * feat_loss
+ inv_loss_quad = params.lambda_mse * mse_loss + params.lambda_feat * inv_loss_quad
# --------------------------
# Optimizer.
@@ -304,13 +216,19 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
params.inv_it / params.decay_n, 0.1, staircase=True)
else:
lrate = tf.constant(params.lr)
+
# trained_params = [label, latent, encoding]
trained_params = [latent, encoding]
+
optimizer = tf.train.AdamOptimizer(learning_rate=lrate, beta1=0.9, beta2=0.999)
inv_train_op = optimizer.minimize(inv_loss, var_list=trained_params,
global_step=inv_step)
reinit_optimizer = tf.variables_initializer(optimizer.variables())
+ optimizer_quad = tf.train.AdamOptimizer(learning_rate=params.lr_quad, beta1=0.9, beta2=0.999)
+ inv_train_op_quad = optimizer_quad.minimize(inv_loss_quad, var_list=trained_params, global_step=inv_step)
+ reinit_optimizer_quad = tf.variables_initializer(optimizer_quad.variables())
+
# --------------------------
# Noise source.
# --------------------------
@@ -394,15 +312,20 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
sess.run([
encoding.assign(gen_encoding),
reinit_optimizer,
+ reinit_optimizer_quad,
])
# Main optimization loop.
print("Beginning dense iteration...")
for _ in range(params.inv_it):
- _inv_loss, _mse_loss, _feat_loss,\
- _lrate, _ssim_loss, _ = sess.run([inv_loss, mse_loss, feat_loss,
- lrate, ssim_loss, inv_train_op])
+ if it < params.inv_it * 0.75:
+ _inv_loss, _mse_loss, _feat_loss,\
+ _lrate, _ = sess.run([inv_loss, mse_loss, feat_loss,
+ lrate, inv_train_op])
+ else:
+ _inv_loss, _mse_loss, _feat_loss, _ = sess.run([inv_loss_quad, mse_loss, feat_loss_quad, inv_train_op_quad])
+ _lrate = params.lr_quad
if params.clipping or params.stochastic_clipping:
sess.run(clip_latent)
@@ -412,9 +335,9 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
# Log losses.
etime = time.time() - start_time
print('It [{:8d}] time [{:5.1f}] total [{:.4f}] mse [{:.4f}] '
- 'feat [{:.4f}] ssim [{:.4f}] '
+ 'feat [{:.4f}] '
'lr [{:.4f}]'.format(it, etime, _inv_loss, _mse_loss,
- _feat_loss, _ssim_loss, _lrate))
+ _feat_loss, _lrate))
sys.stdout.flush()
@@ -476,3 +399,36 @@ def find_dense_embedding_for_images(params, opt_tag="inverse_" + timestamp(), op
print('End of inversion.')
out_file.close()
sess.close()
+
+def feature_loss(img_a, img_b, y, x, height, width):
+ if y is not None:
+ img_a = tf.image.crop_to_bounding_box(img_a, y, x, height, width)
+ img_b = tf.image.crop_to_bounding_box(img_b, y, x, height, width)
+ else:
+ img_a = tf.image.resize_images(img_a, [height, width])
+ img_b = tf.image.resize_images(img_b, [height, width])
+
+ gen_feat_ex = feature_extractor(dict(images=img_a), as_dict=True, signature='image_feature_vector')
+ target_feat_ex = feature_extractor(dict(images=img_b), as_dict=True, signature='image_feature_vector')
+
+ feat_loss = tf.constant(0.0)
+ img_feat_err = tf.constant(0.0)
+
+ if type(opt_feature_layers) == str:
+ opt_feature_layers = opt_feature_layers.split(',')
+ fixed_layers = []
+ for layer in opt_feature_layers:
+ if ',' in layer:
+ fixed_layers += layer.split(',')
+ else:
+ fixed_layers.append(layer)
+
+ for layer in fixed_layers:
+ if layer in feature_layer_names:
+ layer_name = feature_layer_names[layer]
+ gen_feat = gen_feat_ex[layer_name]
+ target_feat = target_feat_ex[layer_name]
+ feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1])
+ feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers)
+ img_feat_err += tf.reduce_mean(feat_square_diff, axis=1) / len(opt_feature_layers)
+ return feat_loss, img_feat_err