diff options
| author | adamhrv <adam@ahprojects.com> | 2018-12-15 19:57:49 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2018-12-15 19:57:49 +0100 |
| commit | 82b2c0b5d6d7baccbe4d574d96e18fe2078047d7 (patch) | |
| tree | a8784b7ec2bc5a0451c252f66a6b786f3a2504f5 /megapixels/commands | |
| parent | 8e978af21c2b29f678a09701afb3ec7d65d0a6ab (diff) | |
| parent | c5b02ffab8d388e8a2925e51736b902a48a95e71 (diff) | |
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/commands')
| -rw-r--r-- | megapixels/commands/faiss/build_db.py | 15 | ||||
| -rw-r--r-- | megapixels/commands/faiss/build_faiss.py | 58 | ||||
| -rw-r--r-- | megapixels/commands/faiss/sync_metadata.py | 18 |
3 files changed, 91 insertions, 0 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py new file mode 100644 index 00000000..0f979e41 --- /dev/null +++ b/megapixels/commands/faiss/build_db.py @@ -0,0 +1,15 @@ +""" +Load all the CSV files into MySQL +""" + +import click + +from app.models.sql_factory import load_sql_datasets + +@click.command() +@click.pass_context +def cli(ctx): + """import the various CSVs into MySQL + """ + print('Loading CSV datasets into SQL...') + load_sql_datasets(replace=True) diff --git a/megapixels/commands/faiss/build_faiss.py b/megapixels/commands/faiss/build_faiss.py new file mode 100644 index 00000000..96d3f99e --- /dev/null +++ b/megapixels/commands/faiss/build_faiss.py @@ -0,0 +1,58 @@ +""" +Index all of the FAISS datasets +""" + +import os +import glob +import click +import faiss +import time +import numpy as np + +from app.utils.file_utils import load_recipe, load_csv_safe +from app.settings import app_cfg as cfg + +engine = create_engine('sqlite:///:memory:') + +class DefaultRecipe: + def __init__(self): + self.dim = 128 + self.factory_type = 'Flat' + +@click.command() +@click.pass_context +def cli(ctx): + """build the FAISS index. + - looks for all datasets in faiss/metadata/ + - uses the recipe above by default + - however you can override this by adding a new recipe in faiss/recipes/{name}.json + """ + datasets = [] + for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): + name = os.path.basename(fn) + recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") + if os.path.exists(recipe_fn): + build_faiss(name, load_recipe(recipe_fn)) + else: + build_faiss(name, DefaultRecipe()) + +def build_faiss(name, recipe): + vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv") + index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") + + index = faiss.index_factory(recipe.dim, recipe.factory_type) + + keys, rows = load_csv_safe(vec_fn) + feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32') + n, d = feats.shape + + print("{}: training {} x {} dim vectors".format(name, n, d)) + print(recipe.factory_type) + + add_start = time.time() + index.add(feats) + add_end = time.time() + add_time = add_end - add_start + print("{}: add time: {:.1f}s".format(name, add_time)) + + faiss.write_index(index, index_fn) diff --git a/megapixels/commands/faiss/sync_metadata.py b/megapixels/commands/faiss/sync_metadata.py new file mode 100644 index 00000000..b01211b4 --- /dev/null +++ b/megapixels/commands/faiss/sync_metadata.py @@ -0,0 +1,18 @@ +""" +Sync the FAISS metadata +""" + +import subprocess +import click + +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + """synchronize metadata files from s3""" + sts = subprocess.call([ + "s3cmd", "sync", + "s3://megapixels/v1/metadata/", + cfg.DIR_FAISS_METADATA + '/', + ]) |
