summaryrefslogtreecommitdiff
path: root/cli/app/commands
diff options
context:
space:
mode:
Diffstat (limited to 'cli/app/commands')
-rw-r--r--cli/app/commands/biggan/search.py131
1 files changed, 38 insertions, 93 deletions
diff --git a/cli/app/commands/biggan/search.py b/cli/app/commands/biggan/search.py
index e764487..f1cf385 100644
--- a/cli/app/commands/biggan/search.py
+++ b/cli/app/commands/biggan/search.py
@@ -8,7 +8,6 @@ from os.path import join
import time
import numpy as np
import random
-from scipy.stats import truncnorm
from subprocess import call
import cv2 as cv
from PIL import Image
@@ -16,87 +15,22 @@ from glob import glob
import tensorflow as tf
import tensorflow_hub as hub
import shutil
+import h5py
-def image_to_uint8(x):
- """Converts [-1, 1] float array to [0, 255] uint8."""
- x = np.asarray(x)
- x = (256. / 2.) * (x + 1.)
- x = np.clip(x, 0, 255)
- x = x.astype(np.uint8)
- return x
-
-def truncated_z_sample(batch_size, z_dim, truncation):
- values = truncnorm.rvs(-2, 2, size=(batch_size, z_dim))
- return truncation * values
-
-def truncated_z_single(z_dim, truncation):
- values = truncnorm.rvs(-2, 2, size=(1, z_dim))
- return truncation * values
-
-def create_labels(batch_size, vocab_size, num_classes):
- label = np.zeros((batch_size, vocab_size))
- for i in range(batch_size):
- for _ in range(random.randint(1, num_classes)):
- j = random.randint(0, vocab_size-1)
- label[i, j] = random.random()
- label[i] /= label[i].sum()
- return label
-
-def imconvert_uint8(im):
- im = np.clip(((im + 1) / 2.0) * 256, 0, 255)
- im = np.uint8(im)
- return im
-
-def imconvert_float32(im):
- im = np.float32(im)
- im = (im / 256) * 2.0 - 1
- return im
-
-def imread(filename):
- img = cv.imread(filename, cv.IMREAD_UNCHANGED)
- if img is not None:
- if len(img.shape) > 2:
- img = img[...,::-1]
- return img
-
-def imwrite(filename, img):
- if img is not None:
- if len(img.shape) > 2:
- img = img[...,::-1]
- return cv.imwrite(filename, img)
-
-def imgrid(imarray, cols=5, pad=1):
- if imarray.dtype != np.uint8:
- raise ValueError('imgrid input imarray must be uint8')
- pad = int(pad)
- assert pad >= 0
- cols = int(cols)
- assert cols >= 1
- N, H, W, C = imarray.shape
- rows = int(np.ceil(N / float(cols)))
- batch_pad = rows * cols - N
- assert batch_pad >= 0
- post_pad = [batch_pad, pad, pad, 0]
- pad_arg = [[0, p] for p in post_pad]
- imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255)
- H += pad
- W += pad
- grid = (imarray
- .reshape(rows, cols, H, W, C)
- .transpose(0, 2, 1, 3, 4)
- .reshape(rows*H, cols*W, C))
- if pad:
- grid = grid[:-pad, :-pad]
- return grid
+from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32, \
+ imread, imwrite, imgrid
+from app.search.vector import truncated_z_sample, truncated_z_single, create_labels
@click.command('')
@click.option('-i', '--input', 'opt_fp_in', required=True,
help='Path to input image')
@click.option('-s', '--dims', 'opt_dims', default=128, type=int,
help='Dimensions of BigGAN network (128, 256, 512)')
+@click.option('-v', '--video', 'opt_video', is_flag=True,
+ help='Export a video for each dataset')
# @click.option('-r', '--recursive', 'opt_recursive', is_flag=True)
@click.pass_context
-def cli(ctx, opt_fp_in, opt_dims):
+def cli(ctx, opt_fp_in, opt_dims, opt_video):
"""
Search for an image in BigGAN using gradient descent
"""
@@ -109,6 +43,9 @@ def cli(ctx, opt_fp_in, opt_dims):
input_trunc = inputs['truncation']
output = generator(inputs)
+ z_dim = input_z.shape.as_list()[1]
+ vocab_size = input_y.shape.as_list()[1]
+
sess = tf.compat.v1.Session()
sess.run(tf.compat.v1.global_variables_initializer())
sess.run(tf.compat.v1.tables_initializer())
@@ -117,12 +54,24 @@ def cli(ctx, opt_fp_in, opt_dims):
paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \
glob(os.path.join(opt_fp_in, '*.jpeg')) + \
glob(os.path.join(opt_fp_in, '*.png'))
- for path in paths:
- find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, path, opt_dims)
else:
- find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims)
+ paths = [opt_fp_in]
+
+ fp_inverses = os.path.join(app_cfg.INVERSES_DIR, params.dataset_out)
+ os.makedirs(fp_inverses, exist_ok=True)
+ out_file = h5py.File(fp_inverses, 'w')
+ out_images = out_file.create_dataset('xtrain', [len(paths), 3, 512, 512], dtype='float32')
+ out_labels = out_file.create_dataset('ytrain', [len(paths), vocab_size], dtype='float32')
+
+ for path, index in enumerate(paths):
+ fp_frames = find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, path, opt_dims, out_images, out_labels, index)
+ if opt_video:
+ export_video(fp_frames)
-def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims):
+def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output, opt_fp_in, opt_dims, out_images, out_labels, index):
+ """
+ Find the closest latent and class vectors for an image. Store the class vector in an HDF5.
+ """
z_dim = input_z.shape.as_list()[1]
vocab_size = input_y.shape.as_list()[1]
@@ -143,18 +92,14 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output,
fp_frames = "frames_{}_{}".format(fbase, int(time.time() * 1000))
os.makedirs(join(app_cfg.DIR_OUTPUTS, fp_frames), exist_ok=True)
target_im = imread(opt_fp_in)
- w = target_im.shape[1]
- h = target_im.shape[0]
- if w <= h:
- scale = opt_dims / w
- else:
- scale = opt_dims / h
- #print("{} {}".format(w, h))
- target_im = cv.resize(target_im,(0,0), fx=scale, fy=scale)
- phi_target = imconvert_float32(target_im)
- phi_target = phi_target[:opt_dims,:opt_dims]
- if phi_target.shape[2] == 4:
- phi_target = phi_target[:,:,1:4]
+ # crop image to 512 and save for later processing
+ phi_target_for_inversion = resize_and_crop_image(target_im, 512)
+ b = np.dsplit(phi_target_for_inversion, 3)
+ phi_target_for_inversion = np.stack(b).reshape((3, 512, 512))
+ out_images[index] = phi_target_for_inversion
+
+ # crop image to 128 to find vectors
+ phi_target = resize_and_crop_image(target_im, opt_dims)
phi_target = np.expand_dims(phi_target, 0)
phi_target = np.repeat(phi_target, batch_size, axis=0)
else:
@@ -211,11 +156,12 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output,
imwrite(join(app_cfg.DIR_OUTPUTS, fp_frames, 'frame_{:04d}.png'.format(i)), guess_im)
if i % 20 == 0:
print('lr: {}, iter: {}, grad_z: {}, grad_y: {}'.format(lr_z, i, np.std(grad_z), np.std(grad_y)))
- #print('lr: {}, iter: {}, grad_z: {}'.format(lr, i, np.std(grad_z)))
- #print('lr: {}, iter: {}, grad_y: {}'.format(lr, i, np.std(grad_y)))
except KeyboardInterrupt:
pass
+ out_labels[index] = y
+ return fp_frames
+def export_video(fp_frames):
print("Exporting video...")
cmd = [
'/home/lens/bin/ffmpeg',
@@ -225,7 +171,6 @@ def find_nearest_vector(generator, sess, input_z, input_y, input_trunc, output,
'-pix_fmt', 'yuv420p',
join(app_cfg.DIR_OUTPUTS, fp_frames + '.mp4')
]
- print(' '.join(cmd))
+ # print(' '.join(cmd))
call(cmd)
shutil.rmtree(join(app_cfg.DIR_OUTPUTS, fp_frames))
-