diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 15:50:07 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-14 15:50:07 +0100 |
| commit | e1fba831b7c22f9840c5e92227f688079b9a206e (patch) | |
| tree | 834e4160b2a9ae54800c55e9d0e42d20f83513f5 /megapixels | |
| parent | 9e7713e83a99d8ca50ffff49def7085bb8f4e09c (diff) | |
mysql import script
Diffstat (limited to 'megapixels')
| -rw-r--r-- | megapixels/app/models/sql_factory.py | 91 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 11 | ||||
| -rw-r--r-- | megapixels/commands/faiss/build.py | 58 | ||||
| -rw-r--r-- | megapixels/commands/faiss/build_db.py | 38 | ||||
| -rw-r--r-- | megapixels/commands/faiss/sync.py | 5 |
5 files changed, 179 insertions, 24 deletions
diff --git a/megapixels/app/models/sql_factory.py b/megapixels/app/models/sql_factory.py new file mode 100644 index 00000000..4adc6f48 --- /dev/null +++ b/megapixels/app/models/sql_factory.py @@ -0,0 +1,91 @@ +import os + +from sqlalchemy import create_engine, Table, Column, String, Integer, DateTime, Float +from sqlalchemy.orm import sessionmaker +from sqlalchemy.ext.declarative import declarative_base, declared_attr +from sqlalchemy.ext.declarative import AbstractConcreteBase, ConcreteBase + +connection_url = "mysql+mysqldb://{}:{}@{}/{}".format( + os.getenv("DB_USER"), + os.getenv("DB_PASS"), + os.getenv("DB_HOST"), + os.getenv("DB_NAME") +) + +engine = create_engine(connection_url) +Session = sessionmaker(bind=engine) +session = Session() +Base = declarative_base(engine) + +class SqlDataset: + def __init__(self, name): + self.name = name + self.tables = {} + + def get_table(self, type): + if type in self.tables: + return self.tables[type] + elif type == 'uuid': + self.tables[type] = self.uuid_table() + elif type == 'roi': + self.tables[type] = self.roi_table() + elif type == 'identity_meta': + self.tables[type] = self.identity_table() + elif type == 'pose': + self.tables[type] = self.pose_table() + else: + return None + return self.tables[type] + + # ==> uuids.csv <== + # index,uuid + # 0,f03fd921-2d56-4e83-8115-f658d6a72287 + def uuid_table(self): + class UUID(Base): + __tablename__ = self.name + "_uuid" + id = Column(Integer, primary_key=True) + uuid = Column(String(36), nullable=False) + return UUID + + # ==> roi.csv <== + # index,h,image_height,image_index,image_width,w,x,y + # 0,0.33000000000000007,250,0,250,0.32999999999999996,0.33666666666666667,0.35 + def roi_table(self): + class ROI(Base): + __tablename__ = self.name + "_roi" + id = Column(Integer, primary_key=True) + h = Column(Float, nullable=False) + image_height = Column(Integer, nullable=False) + image_index = Column(Integer, nullable=False) + image_width = Column(Integer, nullable=False) + w = Column(Float, nullable=False) + x = Column(Float, nullable=False) + y = Column(Float, nullable=False) + return ROI + + # ==> identity.csv <== + # index,fullname,description,gender,images,image_index + # 0,A. J. Cook,Canadian actress,f,1,0 + def identity_table(self): + class Identity(Base): + __tablename__ = self.name + "_identity" + id = Column(Integer, primary_key=True) + fullname = Column(String(36), nullable=False) + description = Column(String(36), nullable=False) + gender = Column(String(1), nullable=False) + images = Column(Integer, nullable=False) + image_id = Column(Integer, nullable=False) + return Identity + + # ==> pose.csv <== + # index,image_index,pitch,roll,yaw + # 0,0,11.16264458441435,10.415885631337728,22.99719032415318 + def pose_table(self): + class Pose(Base): + __tablename__ = self.name + "_pose" + id = Column(Integer, primary_key=True) + image_id = Column(Integer, primary_key=True) + pitch = Column(Float, nullable=False) + roll = Column(Float, nullable=False) + yaw = Column(Float, nullable=False) + return Pose diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index b2876af7..51392bcc 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -2,6 +2,7 @@ import os from os.path import join import logging import collections +from dotenv import load_dotenv import cv2 as cv @@ -42,9 +43,9 @@ DIR_MODELS_DLIB_68PT = join(DIR_MODELS_DLIB, 'shape_predictor_68_face_landmarks. DIR_MODELS_DLIB_FACEREC_RESNET = join(DIR_MODELS_DLIB, 'dlib_face_recognition_resnet_model_v1.dat') DIR_FAISS = join(DIR_APP, 'faiss') -DIR_FAISS_DATASETS = join(DIR_FAISS, 'datasets') DIR_FAISS_INDEXES = join(DIR_FAISS, 'indexes') DIR_FAISS_METADATA = join(DIR_FAISS, 'metadata') +DIR_FAISS_RECIPES = join(DIR_FAISS, 'recipes') # Test images DIR_TEST_IMAGES = join(DIR_APP, 'test', 'images') @@ -62,6 +63,7 @@ FP_FONT = join(DIR_ASSETS, 'font') DIR_COMMANDS_CV = 'commands/cv' DIR_COMMANDS_ADMIN = 'commands/admin' DIR_COMMANDS_DATASETS = 'commands/datasets' +DIR_COMMANDS_FAISS = 'commands/faiss' DIR_COMMANDS_MISC = 'commands/misc' # ----------------------------------------------------------------------------- @@ -109,3 +111,10 @@ LOGFILE_FORMAT = "%(log_color)s%(levelname)-8s%(reset)s %(cyan)s%(filename)s:%(l # ----------------------------------------------------------------------------- S3_MEDIA_ROOT = 's3://megapixels/v1/media/' S3_METADATA_ROOT = 's3://megapixels/v1/metadata/' + +# ----------------------------------------------------------------------------- +# .env config for keys +# ----------------------------------------------------------------------------- + +DIR_DOTENV = join(DIR_APP, '.env') +load_dotenv(dotenv_path=DIR_DOTENV) diff --git a/megapixels/commands/faiss/build.py b/megapixels/commands/faiss/build.py index e95619af..e525542a 100644 --- a/megapixels/commands/faiss/build.py +++ b/megapixels/commands/faiss/build.py @@ -3,44 +3,60 @@ Index all of the FAISS datasets """ import os +import glob import click +import faiss +import time +import numpy as np -from app.utils.file_utils import load_recipe, load_csv +from app.utils.file_utils import load_recipe, load_csv_safe from app.settings import app_cfg as cfg +engine = create_engine('sqlite:///:memory:') + +class DefaultRecipe: + def __init__(self): + self.dim = 128 + self.factory_type = 'Flat' + @click.command() @click.pass_context def cli(ctx): - """train the FAISS index""" - - recipe = { - "dim": 128, - "factory_type": "Flat" - } - + """build the FAISS index. + - looks for all datasets in faiss/metadata/ + - uses the recipe above by default + - however you can override this by adding a new recipe in faiss/recipes/{name}.json + """ datasets = [] - for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_DATASETS, "*")): + for fn in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): name = os.path.basename(fn) recipe_fn = os.path.join(cfg.DIR_FAISS_RECIPES, name + ".json") if os.path.exists(recipe_fn): - train(name, load_recipe(recipe_fn)) + build_faiss(name, load_recipe(recipe_fn)) else: - train(name, recipe) + build_faiss(name, DefaultRecipe()) + # index identities + # certain CSV files should be loaded into mysql + # User.__table__.drop() + SQLemployees.create(engine) -def train(name, recipe): - vec_fn = os.path.join(cfg.DIR_FAISS_DATASETS, name, "vecs.csv") +def build_faiss(name, recipe): + vec_fn = os.path.join(cfg.DIR_FAISS_METADATA, name, "vecs.csv") index_fn = os.path.join(cfg.DIR_FAISS_INDEXES, name + ".index") - index = faiss.index_factory(recipe.dimension, recipe.factory) + index = faiss.index_factory(recipe.dim, recipe.factory_type) - keys, rows = file_utils.load_csv_safe(vec_fn) - feats = np.array([ float(x[1].split(",")) for x in rows]).astype('float32') + keys, rows = load_csv_safe(vec_fn) + feats = np.array([ list(map(float, row[3].split(","))) for row in rows ]).astype('float32') n, d = feats.shape - train_start = time.time() - index.train(feats) - train_end = time.time() - train_time = train_end - train_start - print("{} train time: {:.1f}s".format(name, train_time)) + print("{}: training {} x {} dim vectors".format(name, n, d)) + print(recipe.factory_type) + + add_start = time.time() + index.add(feats) + add_end = time.time() + add_time = add_end - add_start + print("{}: add time: {:.1f}s".format(name, add_time)) faiss.write_index(index, index_fn) diff --git a/megapixels/commands/faiss/build_db.py b/megapixels/commands/faiss/build_db.py new file mode 100644 index 00000000..c90d178b --- /dev/null +++ b/megapixels/commands/faiss/build_db.py @@ -0,0 +1,38 @@ +""" +Load all the CSV files into MySQL +""" + +import os +import glob +import click +import time +import pandas as pd + +from app.models.sql_factory import engine, SqlDataset +from app.utils.file_utils import load_recipe, load_csv_safe +from app.settings import app_cfg as cfg + +@click.command() +@click.pass_context +def cli(ctx): + """import the various CSVs into MySQL + """ + datasets = [] + for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): + build_dataset(path) + +def build_dataset(path): + name = os.path.basename(path) + dataset = SqlDataset(name) + + for fn in glob.iglob(os.path.join(path, "*.csv")): + key = os.path.basename(fn).replace(".csv", "") + table = dataset.get_table(key) + if table is None: + continue + df = pd.read_csv(fn) + + # fix columns that are named "index", a sql reserved word + df.columns = table.__table__.columns.keys() + + df.to_sql(name=table.__tablename__, con=engine, if_exists='replace', index=False) diff --git a/megapixels/commands/faiss/sync.py b/megapixels/commands/faiss/sync.py index ae13b948..b01211b4 100644 --- a/megapixels/commands/faiss/sync.py +++ b/megapixels/commands/faiss/sync.py @@ -10,8 +10,9 @@ from app.settings import app_cfg as cfg @click.command() @click.pass_context def cli(ctx): - sts = call([ + """synchronize metadata files from s3""" + sts = subprocess.call([ "s3cmd", "sync", "s3://megapixels/v1/metadata/", - cfg.DIR_FAISS_METADATA, + cfg.DIR_FAISS_METADATA + '/', ]) |
