diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 02:31:56 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 02:31:56 +0100 |
| commit | 9e7713e83a99d8ca50ffff49def7085bb8f4e09c (patch) | |
| tree | f10311db0044d922b937388ad20fcc2f4f14dac0 /megapixels | |
| parent | 2085381043e95d88704ede0fc92a5f129bde466d (diff) | |
faiss cli lib
Diffstat (limited to 'megapixels')
| -rw-r--r-- | megapixels/cli_faiss.py | 36 | ||||
| -rw-r--r-- | megapixels/commands/faiss/build.py | 46 | ||||
| -rw-r--r-- | megapixels/commands/faiss/sync.py | 17 |
3 files changed, 99 insertions, 0 deletions
diff --git a/megapixels/cli_faiss.py b/megapixels/cli_faiss.py new file mode 100644 index 00000000..9953d9b7 --- /dev/null +++ b/megapixels/cli_faiss.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------- +# add/edit commands in commands/faiss directory +# -------------------------------------------------------- + +import click + +from app.settings import app_cfg as cfg +from app.utils import logger_utils +from app.models.click_factory import ClickSimple + +# click cli factory +cc = ClickSimple.create(cfg.DIR_COMMANDS_FAISS) + +# -------------------------------------------------------- +# CLI +# -------------------------------------------------------- +@click.group(cls=cc, chain=False) +@click.option('-v', '--verbose', 'verbosity', count=True, default=4, + show_default=True, + help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL') +@click.pass_context +def cli(ctx, **kwargs): + """\033[1m\033[94mMegaPixels: FAISS Data Scripts\033[0m + """ + ctx.opts = {} + # init logger + logger_utils.Logger.create(verbosity=kwargs['verbosity']) + + + +# -------------------------------------------------------- +# Entrypoint +# -------------------------------------------------------- +if __name__ == '__main__': + cli() + diff --git a/megapixels/commands/faiss/build.py b/megapixels/commands/faiss/build.py new file mode 100644 index 00000000..e95619af --- /dev/null +++ b/megapixels/commands/faiss/build.py @@ -0,0 +1,46 @@ +""" +Index all of the FAISS datasets +""" + +import os +import click + +from app.utils.file_utils import load_recipe, load_csv +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + """train the FAISS index""" + + recipe = { + "dim": 128, + "factory_type": "Flat" + } + + datasets = [] + for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_DATASETS, "*")): + name = os.path.basename(fn) + recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") + if os.path.exists(recipe_fn): + train(name, load_recipe(recipe_fn)) + else: + train(name, recipe) + +def train(name, recipe): + vec_fn = os.path.join(cfg.DIR_FAISS_DATASETS, name, "vecs.csv") + index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") + + index = faiss.index_factory(recipe.dimension, recipe.factory) + + keys, rows = file_utils.load_csv_safe(vec_fn) + feats = np.array([ float(x[1].split(",")) for x in rows]).astype('float32') + n, d = feats.shape + + train_start = time.time() + index.train(feats) + train_end = time.time() + train_time = train_end - train_start + print("{} train time: {:.1f}s".format(name, train_time)) + + faiss.write_index(index, index_fn) diff --git a/megapixels/commands/faiss/sync.py b/megapixels/commands/faiss/sync.py new file mode 100644 index 00000000..ae13b948 --- /dev/null +++ b/megapixels/commands/faiss/sync.py @@ -0,0 +1,17 @@ +""" +Sync the FAISS metadata +""" + +import subprocess +import click + +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + sts = call([ + "s3cmd", "sync", + "s3://megapixels/v1/metadata/", + cfg.DIR_FAISS_METADATA, + ]) |
