From b39b1d51db2d485e9c60fb4d3f5445474cef8700 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Fri, 14 Dec 2018 16:39:47 +0100 Subject: mysql import functions --- megapixels/app/models/sql_factory.py | 30 +++++++++------ megapixels/commands/faiss/build.py | 62 ------------------------------ megapixels/commands/faiss/build_faiss.py | 58 ++++++++++++++++++++++++++++ megapixels/commands/faiss/sync.py | 18 --------- megapixels/commands/faiss/sync_metadata.py | 18 +++++++++ 5 files changed, 95 insertions(+), 91 deletions(-) delete mode 100644 megapixels/commands/faiss/build.py create mode 100644 megapixels/commands/faiss/build_faiss.py delete mode 100644 megapixels/commands/faiss/sync.py create mode 100644 megapixels/commands/faiss/sync_metadata.py diff --git a/megapixels/app/models/sql_factory.py b/megapixels/app/models/sql_factory.py index 4adc6f48..ecca0c7f 100644 --- a/megapixels/app/models/sql_factory.py +++ b/megapixels/app/models/sql_factory.py @@ -2,8 +2,7 @@ import os from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float from sqlalchemy.orm import sessionmaker -from sqlalchemy.ext.declarative import declarative_base, declared_attr -from sqlalchemy.ext.declarative import AbstractConcreteBase, ConcreteBase +from sqlalchemy.ext.declarative import declarative_base connection_url = "mysql+mysqldb://{}:{}@{}/{}".format( os.getenv("DB_USER"), @@ -12,15 +11,24 @@ connection_url = "mysql+mysqldb://{}:{}@{}/{}".format( os.getenv("DB_NAME") ) -engine = create_engine(connection_url) -Session = sessionmaker(bind=engine) -session = Session() -Base = declarative_base(engine) +# Session = sessionmaker(bind=engine) +# session = Session() + class SqlDataset: - def __init__(self, name): + """ + Bridge between the facial information CSVs connected to the datasets, and MySQL + - each dataset should have files that can be loaded into these database models + - names will be fixed to work in SQL (index -> id) + - we can then have more generic models for fetching this info after doing a FAISS query + """ + def __init__(self, name, base_model=None): self.name = name self.tables = {} + if base_model is None: + engine = create_engine(connection_url) + base_model = declarative_base(engine) + self.base_model = base_model def get_table(self, type): if type in self.tables: @@ -41,7 +49,7 @@ class SqlDataset: # index,uuid # 0,f03fd921-2d56-4e83-8115-f658d6a72287 def uuid_table(self): - class UUID(Base): + class UUID(self.base_model): __tablename__ = self.name + "_uuid" id = Column(Integer, primary_key=True) uuid = Column(String(36), nullable=False) @@ -51,7 +59,7 @@ class SqlDataset: # index,h,image_height,image_index,image_width,w,x,y # 0,0.33000000000000007,250,0,250,0.32999999999999996,0.33666666666666667,0.35 def roi_table(self): - class ROI(Base): + class ROI(self.base_model): __tablename__ = self.name + "_roi" id = Column(Integer, primary_key=True) h = Column(Float, nullable=False) @@ -67,7 +75,7 @@ class SqlDataset: # index,fullname,description,gender,images,image_index # 0,A. J. Cook,Canadian actress,f,1,0 def identity_table(self): - class Identity(Base): + class Identity(self.base_model): __tablename__ = self.name + "_identity" id = Column(Integer, primary_key=True) fullname = Column(String(36), nullable=False) @@ -81,7 +89,7 @@ class SqlDataset: # index,image_index,pitch,roll,yaw # 0,0,11.16264458441435,10.415885631337728,22.99719032415318 def pose_table(self): - class Pose(Base): + class Pose(self.base_model): __tablename__ = self.name + "_pose" id = Column(Integer, primary_key=True) image_id = Column(Integer, primary_key=True) diff --git a/megapixels/commands/faiss/build.py b/megapixels/commands/faiss/build.py deleted file mode 100644 index e525542a..00000000 --- a/megapixels/commands/faiss/build.py +++ /dev/null @@ -1,62 +0,0 @@ -""" -Index all of the FAISS datasets -""" - -import os -import glob -import click -import faiss -import time -import numpy as np - -from app.utils.file_utils import load_recipe, load_csv_safe -from app.settings import app_cfg as cfg - -engine = create_engine('sqlite:///:memory:') - -class DefaultRecipe: - def __init__(self): - self.dim = 128 - self.factory_type = 'Flat' - -@click.command() -@click.pass_context -def cli(ctx): - """build the FAISS index. - - looks for all datasets in faiss/metadata/ - - uses the recipe above by default - - however you can override this by adding a new recipe in faiss/recipes/{name}.json - """ - datasets = [] - for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): - name = os.path.basename(fn) - recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") - if os.path.exists(recipe_fn): - build_faiss(name, load_recipe(recipe_fn)) - else: - build_faiss(name, DefaultRecipe()) - # index identities - # certain CSV files should be loaded into mysql - # User.__table__.drop() - SQLemployees.create(engine) - -def build_faiss(name, recipe): - vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv") - index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") - - index = faiss.index_factory(recipe.dim, recipe.factory_type) - - keys, rows = load_csv_safe(vec_fn) - feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32') - n, d = feats.shape - - print("{}: training {} x {} dim vectors".format(name, n, d)) - print(recipe.factory_type) - - add_start = time.time() - index.add(feats) - add_end = time.time() - add_time = add_end - add_start - print("{}: add time: {:.1f}s".format(name, add_time)) - - faiss.write_index(index, index_fn) diff --git a/megapixels/commands/faiss/build_faiss.py b/megapixels/commands/faiss/build_faiss.py new file mode 100644 index 00000000..96d3f99e --- /dev/null +++ b/megapixels/commands/faiss/build_faiss.py @@ -0,0 +1,58 @@ +""" +Index all of the FAISS datasets +""" + +import os +import glob +import click +import faiss +import time +import numpy as np + +from app.utils.file_utils import load_recipe, load_csv_safe +from app.settings import app_cfg as cfg + +engine = create_engine('sqlite:///:memory:') + +class DefaultRecipe: + def __init__(self): + self.dim = 128 + self.factory_type = 'Flat' + +@click.command() +@click.pass_context +def cli(ctx): + """build the FAISS index. + - looks for all datasets in faiss/metadata/ + - uses the recipe above by default + - however you can override this by adding a new recipe in faiss/recipes/{name}.json + """ + datasets = [] + for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): + name = os.path.basename(fn) + recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") + if os.path.exists(recipe_fn): + build_faiss(name, load_recipe(recipe_fn)) + else: + build_faiss(name, DefaultRecipe()) + +def build_faiss(name, recipe): + vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv") + index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") + + index = faiss.index_factory(recipe.dim, recipe.factory_type) + + keys, rows = load_csv_safe(vec_fn) + feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32') + n, d = feats.shape + + print("{}: training {} x {} dim vectors".format(name, n, d)) + print(recipe.factory_type) + + add_start = time.time() + index.add(feats) + add_end = time.time() + add_time = add_end - add_start + print("{}: add time: {:.1f}s".format(name, add_time)) + + faiss.write_index(index, index_fn) diff --git a/megapixels/commands/faiss/sync.py b/megapixels/commands/faiss/sync.py deleted file mode 100644 index b01211b4..00000000 --- a/megapixels/commands/faiss/sync.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Sync the FAISS metadata -""" - -import subprocess -import click - -from app.settings import app_cfg as cfg - -@click.command() -@click.pass_context -def cli(ctx): - """synchronize metadata files from s3""" - sts = subprocess.call([ - "s3cmd", "sync", - "s3://megapixels/v1/metadata/", - cfg.DIR_FAISS_METADATA + '/', - ]) diff --git a/megapixels/commands/faiss/sync_metadata.py b/megapixels/commands/faiss/sync_metadata.py new file mode 100644 index 00000000..b01211b4 --- /dev/null +++ b/megapixels/commands/faiss/sync_metadata.py @@ -0,0 +1,18 @@ +""" +Sync the FAISS metadata +""" + +import subprocess +import click + +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + """synchronize metadata files from s3""" + sts = subprocess.call([ + "s3cmd", "sync", + "s3://megapixels/v1/metadata/", + cfg.DIR_FAISS_METADATA + '/', + ]) -- cgit v1.2.3-70-g09d2