biggan

author: Jules Laplace <julescarbon@gmail.com> 2019-12-05 17:01:44 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2019-12-05 17:01:44 +0100
commit: d9ceb77e8700312c554cd3205d0c8db775db00c2 (patch)
tree: 70c0f0e177a7104988fb8b56698a59fc4886d5aa
parent: 1d3c7428068c46568638db5ab547c8aeb2308b57 (diff)
6 files changed, 268 insertions, 91 deletions
diff --git a/cli/app/commands/process/fetch.py b/cli/app/commands/bigbigan/fetch.py
index 5b6c102..5b6c102 100644
--- a/cli/app/commands/process/fetch.py
+++ b/cli/app/commands/bigbigan/fetch.py
diff --git a/cli/app/commands/bigbigan/random.py b/cli/app/commands/bigbigan/random.py
new file mode 100644
index 0000000..a1fd65f
--- /dev/null
+++ b/cli/app/commands/bigbigan/random.py
@@ -0,0 +1,46 @@
+import click
+
+from app.utils import click_utils
+from app.settings import app_cfg
+
+from os.path import join
+import time
+import numpy as np
+
+from PIL import Image
+
+def image_to_uint8(x):
+  """Converts [-1, 1] float array to [0, 255] uint8."""
+  x = np.asarray(x)
+  x = (256. / 2.) * (x + 1.)
+  x = np.clip(x, 0, 255)
+  x = x.astype(np.uint8)
+  return x
+
+@click.command('')
+# @click.option('-i', '--input', 'opt_dir_in', required=True, 
+#   help='Path to input image glob directory')
+# @click.option('-r', '--recursive', 'opt_recursive', is_flag=True)
+@click.pass_context
+def cli(ctx):
+  """
+  """
+  import tensorflow as tf
+  import tensorflow_hub as hub
+
+  print("Loading module...")
+  module = hub.Module('https://tfhub.dev/deepmind/bigbigan-resnet50/1')
+  z = tf.random.normal([8, 120])  # latent samples
+  outputs = module(z, signature='generate', as_dict=True)
+
+  with tf.Session() as sess:
+    sess.run(tf.compat.v1.global_variables_initializer())
+    sess.run(tf.compat.v1.tables_initializer())
+    results = sess.run(outputs)
+
+  for sample in results['default']:
+    sample = image_to_uint8(sample)
+    img = Image.fromarray(sample, "RGB")
+    fp_img_out = "{}.png".format(int(time.time() * 1000))
+    img.save(join(app_cfg.DIR_OUTPUTS, fp_img_out))
+
diff --git a/cli/app/commands/biggan/random.py b/cli/app/commands/biggan/random.py
new file mode 100644
index 0000000..42db0b8
--- /dev/null
+++ b/cli/app/commands/biggan/random.py
@@ -0,0 +1,58 @@
+import click
+
+from app.utils import click_utils
+from app.settings import app_cfg
+
+from os.path import join
+import time
+import numpy as np
+
+from PIL import Image
+
+def image_to_uint8(x):
+  """Converts [-1, 1] float array to [0, 255] uint8."""
+  x = np.asarray(x)
+  x = (256. / 2.) * (x + 1.)
+  x = np.clip(x, 0, 255)
+  x = x.astype(np.uint8)
+  return x
+
+@click.command('')
+# @click.option('-i', '--input', 'opt_dir_in', required=True, 
+#   help='Path to input image glob directory')
+# @click.option('-r', '--recursive', 'opt_recursive', is_flag=True)
+@click.pass_context
+def cli(ctx):
+  """
+  Generate a random BigGAN image
+  """
+  import tensorflow as tf
+  import tensorflow_hub as hub
+
+  print("Loading module...")
+  module = hub.Module('https://tfhub.dev/deepmind/biggan-128/2')
+  # module = hub.Module('https://tfhub.dev/deepmind/biggan-256/2')
+  # module = hub.Module('https://tfhub.dev/deepmind/biggan-512/2')
+
+  batch_size = 8
+  truncation = 0.5  # scalar truncation value in [0.02, 1.0]
+
+  z = truncation * tf.random.truncated_normal([batch_size, 120])  # noise sample
+
+  y_index = tf.random.uniform([batch_size], maxval=1000, dtype=tf.int32)
+  y = tf.one_hot(y_index, 1000)
+
+  outputs = module(dict(y=y, z=z, truncation=truncation))
+
+  with tf.Session() as sess:
+    sess.run(tf.compat.v1.global_variables_initializer())
+    sess.run(tf.compat.v1.tables_initializer())
+    results = sess.run(outputs)
+
+  print(results)
+
+  for sample in results['default']:
+    sample = image_to_uint8(sample)
+    img = Image.fromarray(sample, "RGB")
+    fp_img_out = "{}.png".format(int(time.time() * 1000))
+    img.save(join(app_cfg.DIR_OUTPUTS, fp_img_out))
diff --git a/cli/app/commands/process/random.py b/cli/app/commands/process/random.py
deleted file mode 100644
index a1e5aff..0000000
--- a/cli/app/commands/process/random.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import click
-
-from app.utils import click_utils
-from app.settings import app_cfg
-
-from os.path import join
-import time
-import numpy as np
-
-from PIL import Image
-
-def image_to_uint8(x):
-  """Converts [-1, 1] float array to [0, 255] uint8."""
-  x = np.asarray(x)
-  x = (256. / 2.) * (x + 1.)
-  x = np.clip(x, 0, 255)
-  x = x.astype(np.uint8)
-  return x
-
-@click.command('')
-# @click.option('-i', '--input', 'opt_dir_in', required=True, 
-#   help='Path to input image glob directory')
-# @click.option('-r', '--recursive', 'opt_recursive', is_flag=True)
-@click.pass_context
-def cli(ctx):
-  """
-  """
-  print("Loading Tensorflow....")
-  import tensorflow as tf
-  import tensorflow_hub as hub
-
-  #tf.compat.v1.disable_eager_execution()
-  #g = tf.compat.v1.get_default_graph()
-
-  # Sample a batch of 8 random latent vectors (z) from the Gaussian prior. Then
-  # call the generator on the latent samples to generate a batch of images with
-  # shape [8, 128, 128, 3] and range [-1, 1].
-  #recons = module(z, signature='generate', as_dict=True)['upsampled']
-
-  #info = module.get_input_info_dict('encode')['x']
-  #enc_ph = tf.placeholder(dtype=info.dtype, shape=info.get_shape())
-
-  #z = bigbigan.encode(enc_ph, return_all_features=True)['z_mean']
-  #recons = bigbigan.generate(z, upsample=True)
-  #recons = outputs['upsampled']
-
-  #if return_all_features else outputs['z_sample']
-
-  #fp_img_out = "{}.png".format(int(time.time() * 1000))
-  print("Loading module...")
-  module = hub.Module('https://tfhub.dev/deepmind/bigbigan-resnet50/1')
-  z = tf.random.normal([8, 120])  # latent samples
-  outputs = module(z, signature='generate', as_dict=True)
-
-  with tf.Session() as sess:
-    sess.run(tf.compat.v1.global_variables_initializer())
-    sess.run(tf.compat.v1.tables_initializer())
-    results = sess.run(outputs)
-
-  for sample in results['upsampled']:
-    sample = image_to_uint8(sample)
-    img = Image.fromarray(sample, "RGB")
-    fp_img_out = "{}.png".format(int(time.time() * 1000))
-    img.save(join(app_cfg.DIR_OUTPUTS, fp_img_out))
-    #print(result)
-
-  #tf.keras.preprocessing.image.save_img(
-  #  join(app_cfg.DIR_OUTPUTS, fp_img_out),
-  #  gen_samples,
-  #)
-  #with tf.Session() as sess:
-  #  gen_samples = gen_samples.eval()
-  #  print(gen_samples)
-
-  # # Given a batch of 256x256 RGB images in range [-1, 1], call the encoder to
-  # # compute predicted latents z and other features (e.g. for use in downstream
-  # # recognition tasks).
-  # images = tf.placeholder(tf.float32, shape=[None, 256, 256, 3])
-  # features = module(images, signature='encode', as_dict=True)
-
-  # # Get the predicted latent sample `z_sample` from the dict of features.
-  # # Other available features include `avepool_feat` and `bn_crelu_feat`, used in
-  # # the representation learning results.
-  # z_sample = features['z_sample']  # shape [?, 120]
-
-  # # Compute reconstructions of the input `images` by passing the encoder's output
-  # # `z_sample` back through the generator. Note that raw generator outputs are
-  # # half the resolution of encoder inputs (128x128). To get upsampled generator
-  # # outputs matching the encoder input resolution (256x256), instead use:
-  # #     recons = module(z_sample, signature='generate', as_dict=True)['upsampled']
-  # recons = module(z_sample, signature='generate')  # shape [?, 128, 128, 3]
diff --git a/cli/app/search/search_km.py b/cli/app/search/search_km.py
new file mode 100644
index 0000000..bdffbe4
--- /dev/null
+++ b/cli/app/search/search_km.py
@@ -0,0 +1,86 @@
+import cStringIO
+import numpy as np
+import PIL.Image
+from scipy.stats import truncnorm
+import tensorflow as tf
+import tensorflow_hub as hub
+import cv2
+
+module_path = 'https://tfhub.dev/deepmind/biggan-128/2'  # 128x128 BigGAN
+# module_path = 'https://tfhub.dev/deepmind/biggan-256/2'  # 256x256 BigGAN
+# module_path = 'https://tfhub.dev/deepmind/biggan-512/2'  # 512x512 BigGAN
+
+tf.reset_default_graph()
+module = hub.Module(module_path)
+inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)
+          for k, v in module.get_input_info_dict().iteritems()}
+output = module(inputs)
+
+input_z = inputs['z']
+input_y = inputs['y']
+input_trunc = inputs['truncation']
+
+dim_z = input_z.shape.as_list()[1]
+vocab_size = input_y.shape.as_list()[1]
+
+initializer = tf.global_variables_initializer()
+sess = tf.Session()
+sess.run(initializer)
+
+y = 259 # pomeranian
+n_samples = 9
+truncation = 0.5
+
+# phi_target = imread(uploaded.keys()[0])
+# phi_target = imconvert_float32(phi_target)
+# phi_target = np.expand_dims(phi_target, 0)
+# phi_target = phi_target[:128,:128]
+# phi_target = np.repeat(phi_target, n_samples, axis=0)
+
+label = one_hot([y] * n_samples, vocab_size)
+
+# use z from manifold
+if uploaded is not None:
+  z_target = np.repeat(truncated_z_sample(1, truncation, 0), n_samples, axis=0)
+  feed_dict = {input_z: z_target, input_y: label, input_trunc: truncation}
+  phi_target = sess.run(output, feed_dict=feed_dict)
+
+target_im = imgrid(imconvert_uint8(phi_target), cols=3)
+cost = tf.reduce_sum(tf.pow(output - phi_target, 2))
+dc_dz, = tf.gradients(cost, [input_z])
+
+lr = 0.0001
+z_guess = np.asarray(truncated_z_sample(n_samples, truncation/2, 1))
+feed_dict = {input_z: z_guess, input_y: label, input_trunc: truncation}
+phi_impostor = sess.run(output, feed_dict=feed_dict)
+impostor_im = imgrid(imconvert_uint8(phi_impostor), cols=3)
+comparison = None
+
+try:
+  for i in range(1000):
+    feed_dict = {input_z: z_guess, input_y: label, input_trunc: truncation}
+    grad = dc_dz.eval(session=sess, feed_dict=feed_dict)
+    z_guess -= grad * lr
+
+    # decay/attenuate learning rate to 0.05 of the original over 1000 frames
+    lr *= 0.997
+
+    indices = np.logical_or(z_guess <= -2*truncation, z_guess >= +2*truncation)
+    z_guess[indices] = np.random.randn(np.count_nonzero(indices))
+
+    feed_dict = {input_z: z_guess, input_y: label, input_trunc: truncation}
+    phi_guess = sess.run(output, feed_dict=feed_dict)
+    guess_im = imgrid(imconvert_uint8(phi_guess), cols=3)
+
+    imwrite('frames/{:06d}.png'.format(i), guess_im)
+
+    # display the progress every 10 frames
+    if i % 10 == 0:
+      comparison = imgrid(np.asarray([impostor_im, guess_im, target_im]), cols=3, pad=10)
+
+      # clear_output(wait=True)
+      print('lr: {}, iter: {}, grad_std: {}'.format(lr, i, np.std(grad)))
+      imshow(comparison, format='jpeg')
+except KeyboardInterrupt:
+  pass
+
diff --git a/cli/app/search/util.py b/cli/app/search/util.py
new file mode 100644
index 0000000..a4cdfd9
--- /dev/null
+++ b/cli/app/search/util.py
@@ -0,0 +1,78 @@
+
+def truncated_z_sample(batch_size, truncation=1., seed=None):
+  state = None if seed is None else np.random.RandomState(seed)
+  values = truncnorm.rvs(-2, 2, size=(batch_size, dim_z), random_state=state)
+  return truncation * values
+
+def one_hot(index, vocab_size=vocab_size):
+  index = np.asarray(index)
+  if len(index.shape) == 0:
+    index = np.asarray([index])
+  assert len(index.shape) == 1
+  num = index.shape[0]
+  output = np.zeros((num, vocab_size), dtype=np.float32)
+  output[np.arange(num), index] = 1
+  return output
+
+def imconvert_uint8(im):
+  im = np.clip(((im + 1) / 2.0) * 256, 0, 255)
+  im = np.uint8(im)
+  return im
+
+def imconvert_float32(im):
+  im = np.float32(im)
+  im = (im / 256) * 2.0 - 1
+  return im
+
+def imread(filename):
+  img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
+  if img is not None:
+    if len(img.shape) > 2:
+      img = img[...,::-1]
+  return img
+  
+def imwrite(filename, img):
+  if img is not None:
+    if len(img.shape) > 2:
+      img = img[...,::-1]
+  return cv2.imwrite(filename, img)
+
+def imgrid(imarray, cols=5, pad=1):
+  if imarray.dtype != np.uint8:
+    raise ValueError('imgrid input imarray must be uint8')
+  pad = int(pad)
+  assert pad >= 0
+  cols = int(cols)
+  assert cols >= 1
+  N, H, W, C = imarray.shape
+  rows = int(np.ceil(N / float(cols)))
+  batch_pad = rows * cols - N
+  assert batch_pad >= 0
+  post_pad = [batch_pad, pad, pad, 0]
+  pad_arg = [[0, p] for p in post_pad]
+  imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255)
+  H += pad
+  W += pad
+  grid = (imarray
+          .reshape(rows, cols, H, W, C)
+          .transpose(0, 2, 1, 3, 4)
+          .reshape(rows*H, cols*W, C))
+  if pad:
+    grid = grid[:-pad, :-pad]
+  return grid
+
+def imshow(a, format='png', jpeg_fallback=True):
+  a = np.asarray(a, dtype=np.uint8)
+  str_file = cStringIO.StringIO()
+  PIL.Image.fromarray(a).save(str_file, format)
+  im_data = str_file.getvalue()
+  try:
+    disp = IPython.display.display(IPython.display.Image(im_data))
+  except IOError:
+    if jpeg_fallback and format != 'jpeg':
+      print ('Warning: image was too large to display in format "{}"; '
+             'trying jpeg instead.').format(format)
+      return imshow(a, format='jpeg')
+    else:
+      raise
+  return disp
author	Jules Laplace <julescarbon@gmail.com>	2019-12-05 17:01:44 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2019-12-05 17:01:44 +0100
commit	d9ceb77e8700312c554cd3205d0c8db775db00c2 (patch)
tree	70c0f0e177a7104988fb8b56698a59fc4886d5aa
parent	1d3c7428068c46568638db5ab547c8aeb2308b57 (diff)