summaryrefslogtreecommitdiff
path: root/megapixels/commands
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-15 19:57:49 +0100
committeradamhrv <adam@ahprojects.com>2018-12-15 19:57:49 +0100
commit82b2c0b5d6d7baccbe4d574d96e18fe2078047d7 (patch)
treea8784b7ec2bc5a0451c252f66a6b786f3a2504f5 /megapixels/commands
parent8e978af21c2b29f678a09701afb3ec7d65d0a6ab (diff)
parentc5b02ffab8d388e8a2925e51736b902a48a95e71 (diff)
Merge branch 'master' of github.com:adamhrv/megapixels_dev
Diffstat (limited to 'megapixels/commands')
-rw-r--r--megapixels/commands/faiss/build_db.py15
-rw-r--r--megapixels/commands/faiss/build_faiss.py58
-rw-r--r--megapixels/commands/faiss/sync_metadata.py18
3 files changed, 91 insertions, 0 deletions
diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py
new file mode 100644
index 00000000..0f979e41
--- /dev/null
+++ b/megapixels/commands/faiss/build_db.py
@@ -0,0 +1,15 @@
+"""
+Load all the CSV files into MySQL
+"""
+
+import click
+
+from app.models.sql_factory import load_sql_datasets
+
+@click.command()
+@click.pass_context
+def cli(ctx):
+ """import the various CSVs into MySQL
+ """
+ print('Loading CSV datasets into SQL...')
+ load_sql_datasets(replace=True)
diff --git a/megapixels/commands/faiss/build_faiss.py b/megapixels/commands/faiss/build_faiss.py
new file mode 100644
index 00000000..96d3f99e
--- /dev/null
+++ b/megapixels/commands/faiss/build_faiss.py
@@ -0,0 +1,58 @@
+"""
+Index all of the FAISS datasets
+"""
+
+import os
+import glob
+import click
+import faiss
+import time
+import numpy as np
+
+from app.utils.file_utils import load_recipe, load_csv_safe
+from app.settings import app_cfg as cfg
+
+engine = create_engine('sqlite:///:memory:')
+
+class DefaultRecipe:
+ def __init__(self):
+ self.dim = 128
+ self.factory_type = 'Flat'
+
+@click.command()
+@click.pass_context
+def cli(ctx):
+ """build the FAISS index.
+ - looks for all datasets in faiss/metadata/
+ - uses the recipe above by default
+ - however you can override this by adding a new recipe in faiss/recipes/{name}.json
+ """
+ datasets = []
+ for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
+ name = os.path.basename(fn)
+ recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json")
+ if os.path.exists(recipe_fn):
+ build_faiss(name, load_recipe(recipe_fn))
+ else:
+ build_faiss(name, DefaultRecipe())
+
+def build_faiss(name, recipe):
+ vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv")
+ index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index")
+
+ index = faiss.index_factory(recipe.dim, recipe.factory_type)
+
+ keys, rows = load_csv_safe(vec_fn)
+ feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32')
+ n, d = feats.shape
+
+ print("{}: training {} x {} dim vectors".format(name, n, d))
+ print(recipe.factory_type)
+
+ add_start = time.time()
+ index.add(feats)
+ add_end = time.time()
+ add_time = add_end - add_start
+ print("{}: add time: {:.1f}s".format(name, add_time))
+
+ faiss.write_index(index, index_fn)
diff --git a/megapixels/commands/faiss/sync_metadata.py b/megapixels/commands/faiss/sync_metadata.py
new file mode 100644
index 00000000..b01211b4
--- /dev/null
+++ b/megapixels/commands/faiss/sync_metadata.py
@@ -0,0 +1,18 @@
+"""
+Sync the FAISS metadata
+"""
+
+import subprocess
+import click
+
+from app.settings import app_cfg as cfg
+
+@click.command()
+@click.pass_context
+def cli(ctx):
+ """synchronize metadata files from s3"""
+ sts = subprocess.call([
+ "s3cmd", "sync",
+ "s3://megapixels/v1/metadata/",
+ cfg.DIR_FAISS_METADATA + '/',
+ ])