diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2020-01-07 20:05:57 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2020-01-07 20:05:57 +0100 |
| commit | 08fa371f49042a2989ec3d494f37b8db63b13c95 (patch) | |
| tree | defd020ea3e720e1b58d34c4fb4c16b712cad54c | |
| parent | 45855c051f415e56306f116a921eefe284139694 (diff) | |
script to run everything
| -rw-r--r-- | cli/app/commands/biggan/extract_dense_vectors.py | 49 | ||||
| -rw-r--r-- | cli/app/commands/biggan/fetch.py | 4 | ||||
| -rw-r--r-- | cli/app/commands/biggan/search_class.py | 9 | ||||
| -rw-r--r-- | cli/app/search/json.py | 27 | ||||
| -rw-r--r-- | cli/app/search/search_class.py | 11 | ||||
| -rw-r--r-- | cli/app/search/search_dense.py | 21 | ||||
| -rw-r--r-- | cli/app/utils/cortex_utils.py | 9 |
7 files changed, 90 insertions, 40 deletions
diff --git a/cli/app/commands/biggan/extract_dense_vectors.py b/cli/app/commands/biggan/extract_dense_vectors.py new file mode 100644 index 0000000..2436ce6 --- /dev/null +++ b/cli/app/commands/biggan/extract_dense_vectors.py @@ -0,0 +1,49 @@ +import click +import os + +from app.utils.cortex_utils import fetch_cortex_folder, find_unprocessed_files +from app.search.search_class import find_nearest_vector_for_images +from app.search.search_dense import find_dense_embedding_for_images +from app.search.json import params_dense_dict + +@click.command('') +@click.option('-f', '--folder_id', 'opt_folder_id', type=int, + help='Folder ID to process') +@click.option('-ls', '--latent_steps', 'opt_latent_steps', default=2000, type=int, + help='Number of optimization iterations') +@click.option('-ds', '--dense_steps', 'opt_dense_steps', default=2000, type=int, + help='Number of optimization iterations') +@click.option('-v', '--video', 'opt_video', is_flag=True, + help='Export a video for each dataset') +@click.pass_context +def cli(ctx, opt_folder_id, opt_latent_steps, opt_dense_steps, opt_video): + """ + The full process: + - Fetch new images from the cortex + - Extract labels and base latents + - Extract dense embeddings + - Upload extract images to the cortex + """ + folder = cortex_folder(opt_folder_id) + files = download_cortex_files(opt_folder_id) + unprocessed_files = find_unprocessed_files(files) + if len(unprocessed_files) == 0: + print("All files processed, nothing to do") + return + + print("Processing folder {} ({}), {} new files".format(folder['name'], folder['id'], len(unprocessed_files))) + + tag = "folder_{}".format(folder['id']) + paths = [file['path'] for file in unprocessed_files] + + find_nearest_vector_for_images( + paths=paths, + opt_dims=512, + opt_steps=opt_dense_steps, + opt_video=opt_video, + opt_tag=tag, + opt_limit=-1 + ) + + params = params_dense_dict(tag) + find_dense_embedding_for_images(params) diff --git a/cli/app/commands/biggan/fetch.py b/cli/app/commands/biggan/fetch.py index 39f503c..d454633 100644 --- a/cli/app/commands/biggan/fetch.py +++ b/cli/app/commands/biggan/fetch.py @@ -9,9 +9,9 @@ from app.utils.cortex_utils import fetch_cortex_folder, find_unprocessed_files @click.pass_context def cli(ctx, opt_folder_id): """ - Fetch JSON from the server + Fetch new images from the server """ - files = fetch_cortex_folder(opt_folder_id) + files = download_cortex_files(opt_folder_id) unprocessed_files = find_unprocessed_files(files) print("Unprocessed files:") for file in unprocessed_files: diff --git a/cli/app/commands/biggan/search_class.py b/cli/app/commands/biggan/search_class.py index 0501729..311dc70 100644 --- a/cli/app/commands/biggan/search_class.py +++ b/cli/app/commands/biggan/search_class.py @@ -21,4 +21,11 @@ def cli(ctx, opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag): """ Search for an image (class vector) in BigGAN using gradient descent """ - find_nearest_vector_for_images(opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag) + if os.path.isdir(opt_fp_in): + paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \ + glob(os.path.join(opt_fp_in, '*.jpeg')) + \ + glob(os.path.join(opt_fp_in, '*.png')) + else: + paths = [opt_fp_in] + + find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit) diff --git a/cli/app/search/json.py b/cli/app/search/json.py index beaed3b..b47512d 100644 --- a/cli/app/search/json.py +++ b/cli/app/search/json.py @@ -1,4 +1,5 @@ from os.path import join +from app.settings import app_cfg from app.utils.file_utils import write_json from app.search.params import ParamsDict @@ -10,7 +11,9 @@ def make_params_latent(fp_out_dir, tag): "clip": 1.0, "stochastic_clipping": False, "clipping": False, - "dataset": "inverses/{}/dataset.hdf5".format(tag), + "path": os.path.join(app_cfg.DIR_INVERSES, tag), + "dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.hdf5"), + "out_dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"), "inv_layer": "latent", "decay_lr": True, "inv_it": 15000, @@ -41,19 +44,18 @@ def make_params_latent(fp_out_dir, tag): "lambda_reg": 0.1, "dist_loss": False, "sample_size": 4, - "dataset": "inverses/{}/dataset.encodings.hdf5".format(tag), "save_progress": True, } -def params_latent(fp_out_dir, tag): - return ParamsDict(make_params_latent(fp_out_dir, tag)) +def params_latent(tag): + return ParamsDict(make_params_latent(tag)) def save_params_latent(fp_out_dir, tag): - data = make_params_latent(fp_out_dir, tag) + data = make_params_latent(tag) fp_out_fn = join(fp_out_dir, "params_latent.json") write_json(data, fp_out_fn) -def make_params_dense(fp_out_dir, tag, folder_id=None): +def make_params_dense(tag): return { "tag": tag, "folder_id": folder_id, @@ -62,7 +64,8 @@ def make_params_dense(fp_out_dir, tag, folder_id=None): "clip": 1.0, "stochastic_clipping": False, "clipping": False, - "dataset": "inverses/{}/dataset.encodings.hdf5".format(tag), + "path": os.path.join(app_cfg.DIR_INVERSES, tag), + "dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.latent.hdf5"), "inv_layer": "Generator_2/G_Z/Reshape:0", "decay_lr": False, "inv_it": 6000, @@ -93,15 +96,15 @@ def make_params_dense(fp_out_dir, tag, folder_id=None): "lambda_reg": 0.1, "dist_loss": True, "sample_size": 4, - "out_dataset": "inverses/{}/dataset.encodings.dense.hdf5".format(tag), + "out_dataset": os.path.join(app_cfg.DIR_INVERSES, tag, "dataset.dense.hdf5"), "save_progress": True, "max_batches": 0, } -def params_dense_dict(fp_out_dir, tag, folder_id=None): - return ParamsDict(make_params_dense(fp_out_dir, tag, folder_id)) +def params_dense_dict(tag): + return ParamsDict(make_params_dense(tag)) -def save_params_dense(fp_out_dir, tag, folder_id): - data = make_params_dense(fp_out_dir, tag, folder_id) +def save_params_dense(fp_out_dir, tag): + data = make_params_dense(tag) fp_out_fn = join(fp_out_dir, "params_dense.json") write_json(data, fp_out_fn) diff --git a/cli/app/search/search_class.py b/cli/app/search/search_class.py index eba61e8..7eab4bc 100644 --- a/cli/app/search/search_class.py +++ b/cli/app/search/search_class.py @@ -24,23 +24,16 @@ from app.search.image import image_to_uint8, imconvert_uint8, imconvert_float32, from app.search.vector import truncated_z_sample, truncated_z_single, \ create_labels, create_labels_uniform -def find_nearest_vector_for_images(opt_fp_in, opt_dims, opt_steps, opt_limit, opt_video, opt_tag): +def find_nearest_vector_for_images(paths, opt_dims, opt_steps, opt_video, opt_tag, opt_limit=-1): sess = tf.compat.v1.Session() tf.reset_default_graph() generator = hub.Module('https://tfhub.dev/deepmind/biggan-512/2') - if os.path.isdir(opt_fp_in): - paths = glob(os.path.join(opt_fp_in, '*.jpg')) + \ - glob(os.path.join(opt_fp_in, '*.jpeg')) + \ - glob(os.path.join(opt_fp_in, '*.png')) - else: - paths = [opt_fp_in] - fp_inverses = os.path.join(app_cfg.DIR_INVERSES, opt_tag) os.makedirs(fp_inverses, exist_ok=True) save_params_latent(fp_inverses, opt_tag) save_params_dense(fp_inverses, opt_tag) - out_file = h5py.File(join(fp_inverses, 'dataset.hdf5'), 'w') + out_file = h5py.File(join(fp_inverses, 'dataset.latent.hdf5'), 'w') out_images = out_file.create_dataset('xtrain', (len(paths), 3, 512, 512,), dtype='float32') out_labels = out_file.create_dataset('ytrain', (len(paths), 1000,), dtype='float32') out_latent = out_file.create_dataset('latent', (len(paths), 128,), dtype='float32') diff --git a/cli/app/search/search_dense.py b/cli/app/search/search_dense.py index 392fa70..a538d0d 100644 --- a/cli/app/search/search_dense.py +++ b/cli/app/search/search_dense.py @@ -72,15 +72,13 @@ def find_dense_embedding_for_images(params): LATENT_TAG = 'latent' if params.inv_layer == 'latent' else 'dense' BATCH_SIZE = params.batch_size SAMPLE_SIZE = params.sample_size - LOGS_DIR = os.path.join('inverses', params.tag, LATENT_TAG, 'logs') - SAMPLES_DIR = os.path.join('inverses', params.tag, LATENT_TAG, 'samples') - INVERSES_DIR = os.path.join('inverses', params.tag) - if not os.path.exists(LOGS_DIR): - os.makedirs(LOGS_DIR) - if not os.path.exists(SAMPLES_DIR): - os.makedirs(SAMPLES_DIR) - if not os.path.exists(INVERSES_DIR): - os.makedirs(INVERSES_DIR) + LOGS_DIR = os.path.join(params.path, LATENT_TAG, 'logs') + SAMPLES_DIR = os.path.join(params.path, LATENT_TAG, 'samples') + VECTOR_DIR = os.path.join(params.path, 'vectors') + + os.makedirs(LOGS_DIR, exist_ok=True) + os.makedirs(SAMPLES_DIR, exist_ok=True) + os.makedirs(VECTOR_DIR, exist_ok=True) # -------------------------- # Util functions. @@ -410,7 +408,7 @@ def find_dense_embedding_for_images(params): NUM_IMGS_TO_PROCESS = NUM_IMGS # Output file. - out_file = h5py.File(os.path.join(INVERSES_DIR, params.out_dataset), 'w') + out_file = h5py.File(params.out_dataset, 'w') out_images = out_file.create_dataset('xtrain', [NUM_IMGS_TO_PROCESS,] + IMG_SHAPE, dtype='float32') out_enc = out_file.create_dataset('encoding', [NUM_IMGS_TO_PROCESS,] + ENC_SHAPE, dtype='float32') out_lat = out_file.create_dataset('latent', [NUM_IMGS_TO_PROCESS, Z_DIM], dtype='float32') @@ -421,9 +419,6 @@ def find_dense_embedding_for_images(params): out_fns[:] = sample_fns[:NUM_IMGS_TO_PROCESS] - vector_dir = os.path.join(app_cfg.INVERSES_DIR, "vectors") - os.makedirs(vector_dir, exist_ok=True) - # Gradient descent w.r.t. generator's inputs. it = 0 out_pos = 0 diff --git a/cli/app/utils/cortex_utils.py b/cli/app/utils/cortex_utils.py index 8076fda..50e85a4 100644 --- a/cli/app/utils/cortex_utils.py +++ b/cli/app/utils/cortex_utils.py @@ -10,7 +10,10 @@ def api_url(path): """Generate a base API path""" return "https://lens.neural.garden/api/{}/".format(path) -def fetch_cortex_folder(opt_folder_id): +def cortex_folder(opt_folder_id): + return fetch_json(os.path.join(api_url('folder'), str(opt_folder_id) + "/")) + +def download_cortex_files(opt_folder_id): """Fetch all new, non-generated files in a Cortex folder""" rows = fetch_json(api_url('file'), folder_id=opt_folder_id) fp_out_dir = join(app_cfg.DIR_INPUTS, "cortex", str(opt_folder_id)) @@ -31,12 +34,12 @@ def find_unprocessed_files(files): datasets = {} unprocessed_files = [] for file in files: - if file['generated'] == 1: + if file['generated'] == 1 and file['type'] == 'image': fn, ext = os.path.splitext(file['name']) dataset = fn.split('-')[0] datasets[dataset] = file['id'] for file in files: - if file['generated'] == 0 and file['processed'] != 1: + if file['generated'] == 0 and file['processed'] == 0 and file['type'] == 'image': fn, ext = os.path.splitext(file['name']) dataset = fn.split('-')[0] if dataset not in datasets: |
