diff options
| -rw-r--r-- | cli/app/commands/biggan/extract_dense_vectors.py | 20 | ||||
| -rw-r--r-- | cli/app/commands/biggan/search_class.py | 22 | ||||
| -rw-r--r-- | cli/app/search/search_class.py | 93 | ||||
| -rw-r--r-- | cli/app/search/search_dense.py | 7 |
4 files changed, 120 insertions, 22 deletions
diff --git a/cli/app/commands/biggan/extract_dense_vectors.py b/cli/app/commands/biggan/extract_dense_vectors.py index 2436ce6..7dc25bc 100644 --- a/cli/app/commands/biggan/extract_dense_vectors.py +++ b/cli/app/commands/biggan/extract_dense_vectors.py @@ -15,8 +15,19 @@ from app.search.json import params_dense_dict help='Number of optimization iterations') @click.option('-v', '--video', 'opt_video', is_flag=True, help='Export a video for each dataset') +@click.option('-sc', '--stochastic_clipping', 'opt_stochastic_clipping', default=0 + help='Compute feature loss') +@click.option('-lc', '--label_clipping', 'opt_label_clipping', default=0, + help='Normalize labels every N steps') +@click.option('-feat', '--use_feature_detector', 'opt_use_feature_detector', is_flag=True, + help='Compute feature loss') +@click.option('-ll', '--feature_layers', 'opt_feature_layers', default="1a,2a,4a,7a" + help='Feature layers used for loss') +@click.option('-snap', '--snapshot_interval', 'opt_snapshot_interval', default=20 + help='Interval to store sample images') @click.pass_context -def cli(ctx, opt_folder_id, opt_latent_steps, opt_dense_steps, opt_video): +def cli(ctx, opt_folder_id, opt_latent_steps, opt_dense_steps, opt_video, + opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers, opt_snapshot_interval): """ The full process: - Fetch new images from the cortex @@ -42,7 +53,12 @@ def cli(ctx, opt_folder_id, opt_latent_steps, opt_dense_steps, opt_video): opt_steps=opt_dense_steps, opt_video=opt_video, opt_tag=tag, - opt_limit=-1 + opt_limit=-1, + opt_stochastic_clipping=opt_stochastic_clipping, + opt_label_clipping=opt_label_clipping, + opt_use_feature_detector=opt_use_feature_detector, + opt_feature_layers=opt_feature_layers, + opt_snapshot_interval=opt_snapshot_interval ) params = params_dense_dict(tag) diff --git a/cli/app/commands/biggan/search_class.py b/cli/app/commands/biggan/search_class.py index 311dc70..fc27935 100644 --- a/cli/app/commands/biggan/search_class.py +++ b/cli/app/commands/biggan/search_class.py @@ -13,11 +13,24 @@ from app.search.search_class import find_nearest_vector_for_images help='Limit the number of images to process') @click.option('-v', '--video', 'opt_video', is_flag=True, help='Export a video for each dataset') +@click.option('-d', '--dims', 'opt_dims', default=512, type=int, + help='Dimensions of BigGAN network (128, 256, 512)') @click.option('-t', '--tag', 'opt_tag', default='inverse_' + str(int(time.time() * 1000)), help='Tag this dataset') -# @click.option('-r', '--recursive', 'opt_recursive', is_flag=True) +@click.option('-sc', '--stochastic_clipping', 'opt_stochastic_clipping', default=0 + help='Compute feature loss') +@click.option('-lc', '--label_clipping', 'opt_label_clipping', default=0, + help='Normalize labels every N steps') +@click.option('-feat', '--use_feature_detector', 'opt_use_feature_detector', is_flag=True, + help='Compute feature loss') +@click.option('-ll', '--feature_layers', 'opt_feature_layers', default="1a,2a,4a,7a" + help='Feature layers used for loss') +@click.option('-snap', '--snapshot_interval', 'opt_snapshot_interval', default=20 + help='Interval to store sample images') + @click.pass_context -def cli(ctx, opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag): +def cli(ctx, opt_fp_in, opt_dims, opt_steps, opt_video, opt_tag, + opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers, opt_snapshot_interval): """ Search for an image (class vector) in BigGAN using gradient descent """ @@ -28,4 +41,7 @@ def cli(ctx, opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag): else: paths = [opt_fp_in] - find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit) + opt_feature_layers = opt_feature_layers.split(',') + + find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit, + opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers) diff --git a/cli/app/search/search_class.py b/cli/app/search/search_class.py index 7eab4bc..42ff605 100644 --- a/cli/app/search/search_class.py +++ b/cli/app/search/search_class.py @@ -24,7 +24,29 @@ from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32, from app.search.vector import truncated_z_sample, truncated_z_single, \ create_labels, create_labels_uniform -def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit=-1): +feature_layer_names = { + '1a': "InceptionV3/Conv2d_1a_3x3", + '2a': "InceptionV3/Conv2d_2a_3x3", + '2b': "InceptionV3/Conv2d_2b_3x3", + '3a': "InceptionV3/Conv2d_3a_3x3", + '3b': "InceptionV3/Conv2d_3b_3x3", + '4a': "InceptionV3/Conv2d_4a_3x3", + '5b': "InceptionV3/Mixed_5b", + '5c': "InceptionV3/Mixed_5c", + '5d': "InceptionV3/Mixed_5d", + '6a': "InceptionV3/Mixed_6a", + '6b': "InceptionV3/Mixed_6b", + '6c': "InceptionV3/Mixed_6c", + '6d': "InceptionV3/Mixed_6d", + '6e': "InceptionV3/Mixed_6e", + '7a': "InceptionV3/Mixed_7a", + '7b': "InceptionV3/Mixed_7b", + '7c': "InceptionV3/Mixed_7c", +} + +def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, + opt_limit=-1, opt_stochastic_clipping=0, opt_label_clipping=0, + opt_use_feature_detector=False, opt_feature_layers=[1,2,4,7], opt_snapshot_interval=20): sess = tf.compat.v1.Session() tf.reset_default_graph() generator = hub.Module('https://tfhub.dev/deepmind/biggan-512/2') @@ -42,12 +64,14 @@ def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_ta if index == opt_limit: break out_fns[index] = os.path.basename(path) - fp_frames = find_nearest_vector(sess, generator, path, opt_dims, out_images, out_labels, out_latent, opt_steps, index) + fp_frames = find_nearest_vector(sess, generator, path, opt_dims, out_images, out_labels, out_latent, opt_steps, index, + opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers) if opt_video: export_video(fp_frames) sess.close() -def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_labels, out_latent, opt_steps, index): +def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_labels, out_latent, opt_steps, index, + opt_stochastic_clipping, opt_label_clipping, opt_use_feature_detector, opt_feature_layers, opt_snapshot_interval): """ Find the closest latent and class vectors for an image. Store the class vector in an HDF5. """ @@ -74,13 +98,58 @@ def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_la 'truncation': input_trunc, }) + ## clip the Z encoding + + clipped_encoding = tf.where(tf.abs(latent) >= params.clip, + tf.random.uniform([BATCH_SIZE, Z_DIM], minval=-params.clip, maxval=params.clip), input_z) + clip_latent = tf.assign(latent, clipped_encoding) + target = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, img_size, img_size, num_channels)) - # loss = tf.losses.compute_weighted_loss(tf.square(output - target), weights=mask) - loss = tf.compat.v1.losses.mean_squared_error(target, output) + ## normalize the Y encoding + normalized_labels = tf.nn.l2_normalize(input_y) + clip_labels = tf.assign(input_y, normalized_labels) + + ## if computing Feature loss, use these encoders + if opt_use_feature_detector: + pix_square_diff = tf.square((target_img - gen_img) / 2.0) + mse_loss = tf.reduce_mean(pix_square_diff) + + feature_extractor = hub.Module("https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1") + + # Convert images from range [-1, 1] channels_first to [0, 1] channels_last. + gen_img_1 = tf.transpose(output / 2.0 + 0.5, [0, 2, 3, 1]) + target_img_1 = tf.transpose(target / 2.0 + 0.5, [0, 2, 3, 1]) + + # Convert images to appropriate size for feature extraction. + height, width = hub.get_expected_image_size(feature_extractor) + gen_img_1 = tf.image.resize_images(gen_img_1, [height, width]) + target_img_1 = tf.image.resize_images(target_img_1, [height, width]) + + gen_feat_ex = feature_extractor(dict(images=gen_img_1), as_dict=True, signature='image_feature_vector') + target_feat_ex = feature_extractor(dict(images=target_img_1), as_dict=True, signature='image_feature_vector') - train_step_z = tf.train.AdamOptimizer(z_lr).minimize(loss, var_list=[input_z], name='AdamOpterZ') - train_step_y = tf.train.AdamOptimizer(y_lr).minimize(loss, var_list=[input_y], name='AdamOpterY') + feat_loss = tf.constant(0.0) + + for layer in opt_feature_layers: + layer_name = feature_layer_names[layer] + gen_feat = gen_feat_ex[layer_name] + target_feat = target_feat_ex[layer_name] + feat_square_diff = tf.reshape(tf.square(gen_feat - target_feat), [BATCH_SIZE, -1]) + feat_loss += tf.reduce_mean(feat_square_diff) / len(opt_feature_layers) + + # Batch reconstruction error. + inv_loss = 1.0 * mse_loss + 1.0 * feat_loss + + train_step_z = tf.train.AdamOptimizer(learning_rate=z_lr, beta1=0.9, beta2=0.999) + .minimize(inv_loss, var_list=[input_z]) + train_step_y = tf.train.AdamOptimizer(learning_rate=y_lr, beta1=0.9, beta2=0.999) + .minimize(inv_loss, var_list=[input_y]) + + else: + loss = tf.compat.v1.losses.mean_squared_error(target, output) + train_step_z = tf.train.AdamOptimizer(z_lr).minimize(loss, var_list=[input_z], name='AdamOpterZ') + train_step_y = tf.train.AdamOptimizer(y_lr).minimize(loss, var_list=[input_y], name='AdamOpterY') target_im, fp_frames = load_target_image(opt_fp_in) @@ -106,12 +175,16 @@ def find_nearest_vector(sess, generator, opt_fp_in, opt_dims, out_images, out_la print("Preparing to iterate...") for i in range(opt_steps): curr_loss, _, _ = sess.run([loss, train_step_z, train_step_y], feed_dict=feed_dict) - if i % 20 == 0: + print('iter: {}, loss: {}'.format(i, curr_loss)) + if opt_stochastic_clipping != 0 && i % opt_stochastic_clipping == 0: + sess.run(clip_latent) + if opt_label_clipping != 0 && i % opt_label_clipping == 0: + sess.run(clip_labels) + if opt_snapshot_interval != 0 && i % opt_snapshot_interval == 0: phi_guess = sess.run(output) guess_im = imgrid(imconvert_uint8(phi_guess), cols=1) - imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(int(i / 20))), guess_im) - print('iter: {}, loss: {}'.format(i, curr_loss)) + imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(int(i / opt_snapshot_interval))), guess_im) except KeyboardInterrupt: pass diff --git a/cli/app/search/search_dense.py b/cli/app/search/search_dense.py index a538d0d..8ba3340 100644 --- a/cli/app/search/search_dense.py +++ b/cli/app/search/search_dense.py @@ -80,16 +80,9 @@ def find_dense_embedding_for_images(params): os.makedirs(SAMPLES_DIR, exist_ok=True) os.makedirs(VECTOR_DIR, exist_ok=True) - # -------------------------- - # Util functions. - # -------------------------- - # One hot encoding for classes. def one_hot(values): return np.eye(N_CLASS)[values] - # -------------------------- - # Logging. - # -------------------------- summary_writer = tf.summary.FileWriter(LOGS_DIR) def log_stats(name, val, it): summary = tf.Summary(value=[tf.Summary.Value(tag=name, simple_value=val)]) |
