diff options
Diffstat (limited to 'cli/app/commands/biggan/search.py')
| -rw-r--r-- | cli/app/commands/biggan/search.py | 131 |
1 files changed, 38 insertions, 93 deletions
diff --git a/cli/app/commands/biggan/search.py b/cli/app/commands/biggan/search.py index e764487..f1cf385 100644 --- a/cli/app/commands/biggan/search.py +++ b/cli/app/commands/biggan/search.py @@ -8,7 +8,6 @@ from os.path import join import time import numpy as np import random -from scipy.stats import truncnorm from subprocess import call import cv2 as cv from PIL import Image @@ -16,87 +15,22 @@ from glob import glob import tensorflow as tf import tensorflow_hub as hub import shutil +import h5py -def image_to_uint8(x): - """Converts [-1, 1] float array to [0, 255] uint8.""" - x = np.asarray(x) - x = (256. / 2.) * (x + 1.) - x = np.clip(x, 0, 255) - x = x.astype(np.uint8) - return x - -def truncated_z_sample(batch_size, z_dim, truncation): - values = truncnorm.rvs(-2, 2, size=(batch_size, z_dim)) - return truncation * values - -def truncated_z_single(z_dim, truncation): - values = truncnorm.rvs(-2, 2, size=(1, z_dim)) - return truncation * values - -def create_labels(batch_size, vocab_size, num_classes): - label = np.zeros((batch_size, vocab_size)) - for i in range(batch_size): - for _ in range(random.randint(1, num_classes)): - j = random.randint(0, vocab_size-1) - label[i, j] = random.random() - label[i] /= label[i].sum() - return label - -def imconvert_uint8(im): - im = np.clip(((im + 1) / 2.0) * 256, 0, 255) - im = np.uint8(im) - return im - -def imconvert_float32(im): - im = np.float32(im) - im = (im / 256) * 2.0 - 1 - return im - -def imread(filename): - img = cv.imread(filename, cv.IMREAD_UNCHANGED) - if img is not None: - if len(img.shape) > 2: - img = img[...,::-1] - return img - -def imwrite(filename, img): - if img is not None: - if len(img.shape) > 2: - img = img[...,::-1] - return cv.imwrite(filename, img) - -def imgrid(imarray, cols=5, pad=1): - if imarray.dtype != np.uint8: - raise ValueError('imgrid input imarray must be uint8') - pad = int(pad) - assert pad >= 0 - cols = int(cols) - assert cols >= 1 - N, H, W, C = imarray.shape - rows = int(np.ceil(N / float(cols))) - batch_pad = rows * cols - N - assert batch_pad >= 0 - post_pad = [batch_pad, pad, pad, 0] - pad_arg = [[0, p] for p in post_pad] - imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255) - H += pad - W += pad - grid = (imarray - .reshape(rows, cols, H, W, C) - .transpose(0, 2, 1, 3, 4) - .reshape(rows*H, cols*W, C)) - if pad: - grid = grid[:-pad, :-pad] - return grid +from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32, \ + imread, imwrite, imgrid +from app.search.vector import truncated_z_sample, truncated_z_single, create_labels @click.command('') @click.option('-i', '--input', 'opt_fp_in', required=True, help='Path to input image') @click.option('-s', '--dims', 'opt_dims', default=128, type=int, help='Dimensions of BigGAN network (128, 256, 512)') +@click.option('-v', '--video', 'opt_video', is_flag=True, + help='Export a video for each dataset') # @click.option('-r', '--recursive', 'opt_recursive', is_flag=True) @click.pass_context -def cli(ctx, opt_fp_in, opt_dims): +def cli(ctx, opt_fp_in, opt_dims, opt_video): """ Search for an image in BigGAN using gradient descent """ @@ -109,6 +43,9 @@ def cli(ctx, opt_fp_in, opt_dims): input_trunc = inputs['truncation'] output = generator(inputs) + z_dim = input_z.shape.as_list()[1] + vocab_size = input_y.shape.as_list()[1] + sess = tf.compat.v1.Session() sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) @@ -117,12 +54,24 @@ def cli(ctx, opt_fp_in, opt_dims): paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \ glob(os.path.join(opt_fp_in, '*.jpeg')) + \ glob(os.path.join(opt_fp_in, '*.png')) - for path in paths: - find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, path, opt_dims) else: - find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims) + paths = [opt_fp_in] + + fp_inverses = os.path.join(app_cfg.INVERSES_DIR, params.dataset_out) + os.makedirs(fp_inverses, exist_ok=True) + out_file = h5py.File(fp_inverses, 'w') + out_images = out_file.create_dataset('xtrain', [len(paths), 3, 512, 512], dtype='float32') + out_labels = out_file.create_dataset('ytrain', [len(paths), vocab_size], dtype='float32') + + for path, index in enumerate(paths): + fp_frames = find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, path, opt_dims, out_images, out_labels, index) + if opt_video: + export_video(fp_frames) -def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims): +def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims, out_images, out_labels, index): + """ + Find the closest latent and class vectors for an image. Store the class vector in an HDF5. + """ z_dim = input_z.shape.as_list()[1] vocab_size = input_y.shape.as_list()[1] @@ -143,18 +92,14 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, fp_frames = "frames_{}_{}".format(fbase, int(time.time() * 1000)) os.makedirs(join(app_cfg.DIR_OUTPUTS, fp_frames), exist_ok=True) target_im = imread(opt_fp_in) - w = target_im.shape[1] - h = target_im.shape[0] - if w <= h: - scale = opt_dims / w - else: - scale = opt_dims / h - #print("{} {}".format(w, h)) - target_im = cv.resize(target_im,(0,0), fx=scale, fy=scale) - phi_target = imconvert_float32(target_im) - phi_target = phi_target[:opt_dims,:opt_dims] - if phi_target.shape[2] == 4: - phi_target = phi_target[:,:,1:4] + # crop image to 512 and save for later processing + phi_target_for_inversion = resize_and_crop_image(target_im, 512) + b = np.dsplit(phi_target_for_inversion, 3) + phi_target_for_inversion = np.stack(b).reshape((3, 512, 512)) + out_images[index] = phi_target_for_inversion + + # crop image to 128 to find vectors + phi_target = resize_and_crop_image(target_im, opt_dims) phi_target = np.expand_dims(phi_target, 0) phi_target = np.repeat(phi_target, batch_size, axis=0) else: @@ -211,11 +156,12 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(i)), guess_im) if i % 20 == 0: print('lr: {}, iter: {}, grad_z: {}, grad_y: {}'.format(lr_z, i, np.std(grad_z), np.std(grad_y))) - #print('lr: {}, iter: {}, grad_z: {}'.format(lr, i, np.std(grad_z))) - #print('lr: {}, iter: {}, grad_y: {}'.format(lr, i, np.std(grad_y))) except KeyboardInterrupt: pass + out_labels[index] = y + return fp_frames +def export_video(fp_frames): print("Exporting video...") cmd = [ '/home/lens/bin/ffmpeg', @@ -225,7 +171,6 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, '-pix_fmt', 'yuv420p', join(app_cfg.DIR_OUTPUTS, fp_frames + '.mp4') ] - print(' '.join(cmd)) + # print(' '.join(cmd)) call(cmd) shutil.rmtree(join(app_cfg.DIR_OUTPUTS, fp_frames)) - |
