script to run everything

author: Jules Laplace <julescarbon@gmail.com> 2020-01-07 20:05:57 +0100
committer: Jules Laplace <julescarbon@gmail.com> 2020-01-07 20:05:57 +0100
commit: 08fa371f49042a2989ec3d494f37b8db63b13c95 (patch)
tree: defd020ea3e720e1b58d34c4fb4c16b712cad54c
parent: 45855c051f415e56306f116a921eefe284139694 (diff)
7 files changed, 90 insertions, 40 deletions
diff --git a/cli/app/commands/biggan/extract_dense_vectors.py b/cli/app/commands/biggan/extract_dense_vectors.py
new file mode 100644
index 0000000..2436ce6
--- /dev/null
+++ b/cli/app/commands/biggan/extract_dense_vectors.py
@@ -0,0 +1,49 @@
+import click
+import os
+
+from app.utils.cortex_utils import fetch_cortex_folder, find_unprocessed_files
+from app.search.search_class import find_nearest_vector_for_images
+from app.search.search_dense import find_dense_embedding_for_images
+from app.search.json import params_dense_dict
+
+@click.command('')
+@click.option('-f', '--folder_id', 'opt_folder_id', type=int,
+  help='Folder ID to process')
+@click.option('-ls', '--latent_steps', 'opt_latent_steps', default=2000, type=int,
+  help='Number of optimization iterations')
+@click.option('-ds', '--dense_steps', 'opt_dense_steps', default=2000, type=int,
+  help='Number of optimization iterations')
+@click.option('-v', '--video', 'opt_video', is_flag=True,
+  help='Export a video for each dataset')
+@click.pass_context
+def cli(ctx, opt_folder_id, opt_latent_steps, opt_dense_steps, opt_video):
+  """
+  The full process:
+  - Fetch new images from the cortex
+  - Extract labels and base latents
+  - Extract dense embeddings
+  - Upload extract images to the cortex
+  """
+  folder = cortex_folder(opt_folder_id)
+  files = download_cortex_files(opt_folder_id)
+  unprocessed_files = find_unprocessed_files(files)
+  if len(unprocessed_files) == 0:
+    print("All files processed, nothing to do")
+    return
+  
+  print("Processing folder {} ({}), {} new files".format(folder['name'], folder['id'], len(unprocessed_files)))
+
+  tag = "folder_{}".format(folder['id'])
+  paths = [file['path'] for file in unprocessed_files]
+  
+  find_nearest_vector_for_images(
+    paths=paths,
+    opt_dims=512,
+    opt_steps=opt_dense_steps,
+    opt_video=opt_video,
+    opt_tag=tag,
+    opt_limit=-1
+  )
+
+  params = params_dense_dict(tag)
+  find_dense_embedding_for_images(params)
diff --git a/cli/app/commands/biggan/fetch.py b/cli/app/commands/biggan/fetch.py
index 39f503c..d454633 100644
--- a/cli/app/commands/biggan/fetch.py
+++ b/cli/app/commands/biggan/fetch.py
@@ -9,9 +9,9 @@ from app.utils.cortex_utils import fetch_cortex_folder, find_unprocessed_files
 @click.pass_context
 def cli(ctx, opt_folder_id):
   """
-  Fetch JSON from the server
+  Fetch new images from the server
   """
-  files = fetch_cortex_folder(opt_folder_id)
+  files = download_cortex_files(opt_folder_id)
   unprocessed_files = find_unprocessed_files(files)
   print("Unprocessed files:")
   for file in unprocessed_files:
diff --git a/cli/app/commands/biggan/search_class.py b/cli/app/commands/biggan/search_class.py
index 0501729..311dc70 100644
--- a/cli/app/commands/biggan/search_class.py
+++ b/cli/app/commands/biggan/search_class.py
@@ -21,4 +21,11 @@ def cli(ctx, opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag):
   """
   Search for an image (class vector) in BigGAN using gradient descent
   """
-  find_nearest_vector_for_images(opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag)
+  if os.path.isdir(opt_fp_in):
+    paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \
+      glob(os.path.join(opt_fp_in, '*.jpeg')) + \
+      glob(os.path.join(opt_fp_in, '*.png'))
+  else:
+    paths = [opt_fp_in]
+
+  find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit)
diff --git a/cli/app/search/json.py b/cli/app/search/json.py
index beaed3b..b47512d 100644
--- a/cli/app/search/json.py
+++ b/cli/app/search/json.py
@@ -1,4 +1,5 @@
 from os.path import join
+from app.settings import app_cfg
 from app.utils.file_utils import write_json
 from app.search.params import ParamsDict
 
@@ -10,7 +11,9 @@ def make_params_latent(fp_out_dir, tag):
     "clip": 1.0, 
     "stochastic_clipping": False, 
     "clipping": False, 
-    "dataset": "inverses/{}/dataset.hdf5".format(tag),
+    "path": os.path.join(app_cfg.DIR_INVERSES, tag),
+    "dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.hdf5"),
+    "out_dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"),
     "inv_layer": "latent", 
     "decay_lr": True, 
     "inv_it": 15000, 
@@ -41,19 +44,18 @@ def make_params_latent(fp_out_dir, tag):
     "lambda_reg": 0.1, 
     "dist_loss": False, 
     "sample_size": 4, 
-    "dataset": "inverses/{}/dataset.encodings.hdf5".format(tag),
     "save_progress": True,
   }
 
-def params_latent(fp_out_dir, tag):
-  return ParamsDict(make_params_latent(fp_out_dir, tag))
+def params_latent(tag):
+  return ParamsDict(make_params_latent(tag))
 
 def save_params_latent(fp_out_dir, tag):
-  data = make_params_latent(fp_out_dir, tag)
+  data = make_params_latent(tag)
   fp_out_fn = join(fp_out_dir, "params_latent.json")
   write_json(data, fp_out_fn)
 
-def make_params_dense(fp_out_dir, tag, folder_id=None):
+def make_params_dense(tag):
   return {
     "tag": tag,
     "folder_id": folder_id,
@@ -62,7 +64,8 @@ def make_params_dense(fp_out_dir, tag, folder_id=None):
     "clip": 1.0, 
     "stochastic_clipping": False, 
     "clipping": False, 
-    "dataset": "inverses/{}/dataset.encodings.hdf5".format(tag),
+    "path": os.path.join(app_cfg.DIR_INVERSES, tag),
+    "dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"),
     "inv_layer": "Generator_2/G_Z/Reshape:0", 
     "decay_lr": False, 
     "inv_it": 6000, 
@@ -93,15 +96,15 @@ def make_params_dense(fp_out_dir, tag, folder_id=None):
     "lambda_reg": 0.1, 
     "dist_loss": True, 
     "sample_size": 4, 
-    "out_dataset": "inverses/{}/dataset.encodings.dense.hdf5".format(tag),
+    "out_dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.dense.hdf5"),
     "save_progress": True,
     "max_batches": 0,
   }
 
-def params_dense_dict(fp_out_dir, tag, folder_id=None):
-  return ParamsDict(make_params_dense(fp_out_dir, tag, folder_id))
+def params_dense_dict(tag):
+  return ParamsDict(make_params_dense(tag))
 
-def save_params_dense(fp_out_dir, tag, folder_id):
-  data = make_params_dense(fp_out_dir, tag, folder_id)
+def save_params_dense(fp_out_dir, tag):
+  data = make_params_dense(tag)
   fp_out_fn = join(fp_out_dir, "params_dense.json")
   write_json(data, fp_out_fn)
diff --git a/cli/app/search/search_class.py b/cli/app/search/search_class.py
index eba61e8..7eab4bc 100644
--- a/cli/app/search/search_class.py
+++ b/cli/app/search/search_class.py
@@ -24,23 +24,16 @@ from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32,
 from app.search.vector import truncated_z_sample, truncated_z_single, \
   create_labels, create_labels_uniform
 
-def find_nearest_vector_for_images(opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag):
+def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit=-1):
   sess = tf.compat.v1.Session()
   tf.reset_default_graph()
   generator = hub.Module('https://tfhub.dev/deepmind/biggan-512/2')
 
-  if os.path.isdir(opt_fp_in):
-    paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \
-      glob(os.path.join(opt_fp_in, '*.jpeg')) + \
-      glob(os.path.join(opt_fp_in, '*.png'))
-  else:
-    paths = [opt_fp_in]
-
   fp_inverses = os.path.join(app_cfg.DIR_INVERSES, opt_tag)
   os.makedirs(fp_inverses, exist_ok=True)
   save_params_latent(fp_inverses, opt_tag)
   save_params_dense(fp_inverses, opt_tag)
-  out_file = h5py.File(join(fp_inverses, 'dataset.hdf5'), 'w')
+  out_file = h5py.File(join(fp_inverses, 'dataset.latent.hdf5'), 'w')
   out_images = out_file.create_dataset('xtrain', (len(paths), 3, 512, 512,), dtype='float32')
   out_labels = out_file.create_dataset('ytrain', (len(paths), 1000,), dtype='float32')
   out_latent = out_file.create_dataset('latent', (len(paths), 128,), dtype='float32')
diff --git a/cli/app/search/search_dense.py b/cli/app/search/search_dense.py
index 392fa70..a538d0d 100644
--- a/cli/app/search/search_dense.py
+++ b/cli/app/search/search_dense.py
@@ -72,15 +72,13 @@ def find_dense_embedding_for_images(params):
   LATENT_TAG = 'latent' if params.inv_layer == 'latent' else 'dense'
   BATCH_SIZE = params.batch_size
   SAMPLE_SIZE = params.sample_size
-  LOGS_DIR = os.path.join('inverses', params.tag, LATENT_TAG, 'logs')
-  SAMPLES_DIR = os.path.join('inverses', params.tag, LATENT_TAG, 'samples')
-  INVERSES_DIR = os.path.join('inverses', params.tag)
-  if not os.path.exists(LOGS_DIR):
-    os.makedirs(LOGS_DIR)
-  if not os.path.exists(SAMPLES_DIR):
-    os.makedirs(SAMPLES_DIR)
-  if not os.path.exists(INVERSES_DIR):
-    os.makedirs(INVERSES_DIR)
+  LOGS_DIR = os.path.join(params.path, LATENT_TAG, 'logs')
+  SAMPLES_DIR = os.path.join(params.path, LATENT_TAG, 'samples')
+  VECTOR_DIR = os.path.join(params.path, 'vectors')
+
+  os.makedirs(LOGS_DIR, exist_ok=True)
+  os.makedirs(SAMPLES_DIR, exist_ok=True)
+  os.makedirs(VECTOR_DIR, exist_ok=True)
 
   # --------------------------
   # Util functions.
@@ -410,7 +408,7 @@ def find_dense_embedding_for_images(params):
     NUM_IMGS_TO_PROCESS = NUM_IMGS
 
   # Output file.
-  out_file = h5py.File(os.path.join(INVERSES_DIR, params.out_dataset), 'w')
+  out_file = h5py.File(params.out_dataset, 'w')
   out_images = out_file.create_dataset('xtrain', [NUM_IMGS_TO_PROCESS,] + IMG_SHAPE, dtype='float32')
   out_enc = out_file.create_dataset('encoding', [NUM_IMGS_TO_PROCESS,] + ENC_SHAPE, dtype='float32')
   out_lat = out_file.create_dataset('latent', [NUM_IMGS_TO_PROCESS, Z_DIM], dtype='float32')
@@ -421,9 +419,6 @@ def find_dense_embedding_for_images(params):
 
   out_fns[:] = sample_fns[:NUM_IMGS_TO_PROCESS]
 
-  vector_dir = os.path.join(app_cfg.INVERSES_DIR, "vectors")
-  os.makedirs(vector_dir, exist_ok=True)
-
   # Gradient descent w.r.t. generator's inputs.
   it = 0
   out_pos = 0
diff --git a/cli/app/utils/cortex_utils.py b/cli/app/utils/cortex_utils.py
index 8076fda..50e85a4 100644
--- a/cli/app/utils/cortex_utils.py
+++ b/cli/app/utils/cortex_utils.py
@@ -10,7 +10,10 @@ def api_url(path):
   """Generate a base API path"""
   return "https://lens.neural.garden/api/{}/".format(path)
 
-def fetch_cortex_folder(opt_folder_id):
+def cortex_folder(opt_folder_id):
+  return fetch_json(os.path.join(api_url('folder'), str(opt_folder_id) + "/"))
+
+def download_cortex_files(opt_folder_id):
   """Fetch all new, non-generated files in a Cortex folder"""
   rows = fetch_json(api_url('file'), folder_id=opt_folder_id)
   fp_out_dir = join(app_cfg.DIR_INPUTS, "cortex", str(opt_folder_id))
@@ -31,12 +34,12 @@ def find_unprocessed_files(files):
   datasets = {}
   unprocessed_files = []
   for file in files:
-    if file['generated'] == 1:
+    if file['generated'] == 1 and file['type'] == 'image':
       fn, ext = os.path.splitext(file['name'])
       dataset = fn.split('-')[0]
       datasets[dataset] = file['id']
   for file in files:
-    if file['generated'] == 0 and file['processed'] != 1:
+    if file['generated'] == 0 and file['processed'] == 0 and file['type'] == 'image':
       fn, ext = os.path.splitext(file['name'])
       dataset = fn.split('-')[0]
       if dataset not in datasets:
author	Jules Laplace <julescarbon@gmail.com>	2020-01-07 20:05:57 +0100
committer	Jules Laplace <julescarbon@gmail.com>	2020-01-07 20:05:57 +0100
commit	08fa371f49042a2989ec3d494f37b8db63b13c95 (patch)
tree	defd020ea3e720e1b58d34c4fb4c16b712cad54c
parent	45855c051f415e56306f116a921eefe284139694 (diff)