Merge branch 'master' of github.com:adamhrv/megapixels_dev

author: adamhrv <adam@ahprojects.com> 2018-12-18 01:15:48 +0100
committer: adamhrv <adam@ahprojects.com> 2018-12-18 01:15:48 +0100
commit: 162246a0f1931428c85ab9a31ba42de9ef34dae3 (patch)
tree: a7a80b9836025400269fcb7623a934deb37bbc39 /megapixels
parent: 994d74feae29f2577bc04e10dd4bafbfb3dc8e83 (diff)
parent: bf3dd1399e4ef1db5fb8830004827fe603f73b2e (diff)
4 files changed, 92 insertions, 52 deletions
diff --git a/megapixels/app/models/sql_factory.py b/megapixels/app/models/sql_factory.py
index 9a44941b..da95b539 100644
--- a/megapixels/app/models/sql_factory.py
+++ b/megapixels/app/models/sql_factory.py
@@ -10,7 +10,7 @@ from sqlalchemy.ext.declarative import declarative_base
 from app.utils.file_utils import load_recipe, load_csv_safe
 from app.settings import app_cfg as cfg
 
-connection_url = "mysql+mysqldb://{}:{}@{}/{}".format(
+connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
   os.getenv("DB_USER"),
   os.getenv("DB_PASS"),
   os.getenv("DB_HOST"),
@@ -35,7 +35,12 @@ def load_sql_datasets(replace=False, base_model=None):
   global datasets, loaded, Session
   if loaded:
     return datasets
-  engine = create_engine(connection_url)
+  engine = create_engine(connection_url, encoding="utf-8")
+  # db.set_character_set('utf8')
+  # dbc = db.cursor()
+  # dbc.execute('SET NAMES utf8;')
+  # dbc.execute('SET CHARACTER SET utf8;')
+  # dbc.execute('SET character_set_connection=utf8;')
   Session = sessionmaker(bind=engine)
   for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")):
     dataset = load_sql_dataset(path, replace, engine, base_model)
@@ -92,13 +97,27 @@ class SqlDataset:
       'pose': self.select('pose', id),
     }
 
+  def search_name(self, q):
+    table = self.get_table('identity_meta')
+    uuid_table = self.get_table('uuids')
+
+    identity = table.query.filter(table.fullname.like(q)).order_by(table.fullname.desc()).limit(30)
+    identities = []
+    for row in identity:
+      uuid = uuid_table.query.filter(uuid_table.id == row.image_id).first()
+      identities.append({
+        'uuid': uuid.toJSON(),
+        'identity': row.toJSON(),
+      })
+    return identities
+
   def select(self, table, id):
     table = self.get_table(table)
     if not table:
       return None
     session = Session()
     # for obj in session.query(table).filter_by(id=id):
-    print(table)
+    # print(table)
     obj = session.query(table).filter(table.id == id).first()
     session.close()
     return obj.toJSON()
@@ -125,7 +144,7 @@ class SqlDataset:
     class UUID(self.base_model):
       __tablename__ = self.name + "_uuid"
       id = Column(Integer, primary_key=True)
-      uuid = Column(String(36), nullable=False)
+      uuid = Column(String(36, convert_unicode=True), nullable=False)
       def toJSON(self):
         return {
           'id': self.id,
@@ -167,9 +186,9 @@ class SqlDataset:
     class Identity(self.base_model):
       __tablename__ = self.name + "_identity"
       id = Column(Integer, primary_key=True)
-      fullname = Column(String(36), nullable=False)
-      description = Column(String(36), nullable=False)
-      gender = Column(String(1), nullable=False)
+      fullname = Column(String(36, convert_unicode=True), nullable=False)
+      description = Column(String(36, convert_unicode=True), nullable=False)
+      gender = Column(String(1, convert_unicode=True), nullable=False)
       images = Column(Integer, nullable=False)
       image_id = Column(Integer, nullable=False)
       def toJSON(self):
diff --git a/megapixels/app/server/api.py b/megapixels/app/server/api.py
index bc60118c..35862837 100644
--- a/megapixels/app/server/api.py
+++ b/megapixels/app/server/api.py
@@ -15,68 +15,84 @@ from app.utils.im_utils import pil2np
 sanitize_re = re.compile('[\W]+')
 valid_exts = ['.gif', '.jpg', '.jpeg', '.png']
 
+LIMIT = 9
+THRESHOLD = 0.3
+
 api = Blueprint('api', __name__)
 
 faiss_datasets = load_faiss_databases()
 
 @api.route('/')
 def index():
+  """List the datasets and their fields"""
   return jsonify({ 'datasets': list_datasets() })
 
-@api.route('/dataset/<name>')
-def show(name):
-  dataset = get_dataset(name)
+
+@api.route('/dataset/<dataset_name>')
+def show(dataset_name):
+  """Show the data that a dataset will return"""
+  dataset = get_dataset(dataset_name)
   if dataset:
     return jsonify(dataset.describe())
   else:
     return jsonify({ 'status': 404 })
 
-@api.route('/dataset/<name>/face/', methods=['POST'])
-def upload(name):
+
+@api.route('/dataset/<dataset_name>/face', methods=['POST'])
+def upload(dataset_name):
+  """Query an image against FAISS and return the matching identities"""
   start = time.time()
-  dataset = get_dataset(name)
-  if name not in faiss_datasets:
+  dataset = get_dataset(dataset_name)
+  if dataset_name not in faiss_datasets:
     return jsonify({
       'error': 'invalid dataset'  
     })
-  faiss_dataset = faiss_datasets[name]
+  faiss_dataset = faiss_datasets[dataset_name]
   file = request.files['query_img']
   fn = file.filename
   if fn.endswith('blob'):
     fn = 'filename.jpg'
 
   basename, ext = os.path.splitext(fn)
-  print("got {}, type {}".format(basename, ext))
+  # print("got {}, type {}".format(basename, ext))
   if ext.lower() not in valid_exts:
     return jsonify({ 'error': 'not an image' })
 
   im = Image.open(file.stream).convert('RGB')
   im_np = pil2np(im)
-  
+
   # Face detection
   detector = face_detector.DetectorDLIBHOG()
 
   # get detection as BBox object
   bboxes = detector.detect(im_np, largest=True)
-  if not len(bboxes):
+  if not bboxes or not len(bboxes):
     return jsonify({
       'error': 'bbox'
     })
   bbox = bboxes[0]
+  if not bbox:
+    return jsonify({
+      'error': 'bbox'
+    })
+
   dim = im_np.shape[:2][::-1]
   bbox = bbox.to_dim(dim)  # convert back to real dimensions
+  # print("got bbox")
+  if not bbox:
+    return jsonify({
+      'error': 'bbox'
+    })
 
-  # face recognition/vector
+  # extract 128-D vector
   recognition = face_recognition.RecognitionDLIB(gpu=-1)
   vec = recognition.vec(im_np, bbox)
-
-  # print(vec)
   query = np.array([ vec ]).astype('float32')
   
-  # query FAISS!
-  distances, indexes = faiss_dataset.search(query, 10)
+  # query FAISS
+  distances, indexes = faiss_dataset.search(query, LIMIT)
 
-  if len(indexes) == 0:
+  if len(indexes) == 0 or len(indexes[0]) == 0:
     return jsonify({
       'error': 'nomatch'
     })
@@ -85,48 +101,51 @@ def upload(name):
   distances = distances[0]
   indexes = indexes[0]
 
-  if len(indexes) == 0:
-    return jsonify({
-      'error': 'nomatch'
-    })
-
-  lookup = {}
-  ids = [i+1 for i in indexes]
+  dists = []
+  ids = []
   for _d, _i in zip(distances, indexes):
-    lookup[_i+1] = _d
+    if _d <= THRESHOLD:
+      dists.append(round(float(_d), 2))
+      ids.append(_i+1)
+
+  results = [ dataset.get_identity(int(_i)) for _i in ids ]
 
-  print(distances)
-  print(indexes)
+  # print(distances)
+  # print(ids)
 
-  # with the result we have an ID
-  # query the sql dataset for the UUID etc here
+  # 'bbox': str(bboxes[0]),
+  # 'bbox_dim': str(bbox),
+  # print(bboxes[0])
+  # print(bbox)
 
   query = {
-    'timing': time.time() - start,
+    'timing': round(time.time() - start, 3),
+    'bbox': str(bbox),
   }
-  results = [ dataset.get_identity(id) for id in ids ]
-
-  print(results)
+  # print(results)
   return jsonify({
+    'query': query,
     'results': results,
-    'distances': distances.tolist(),
-    'indexes': indexes.tolist(),
+    'distances': dists,
   })
 
-@api.route('/dataset/<name>/name', methods=['GET'])
-def name_lookup(dataset):
+
+@api.route('/dataset/<dataset_name>/name', methods=['GET','POST'])
+def name_lookup(dataset_name):
+  """Find a name in the dataset"""
   start = time.time()
-  dataset = get_dataset(name)
+  dataset = get_dataset(dataset_name)
 
-  # we have a query from the request query string...
-  # use this to do a like* query on the identities_meta table
+  q = request.args.get('q')
+  # print(q)
 
   query = {
+    'q': q,
     'timing': time.time() - start,
   }
-  results = []
-
-  print(results)
+  results = dataset.search_name(q + '%') if q else None
+  
+  # print(results)
   return jsonify({
     'query': query,
     'results': results,
diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py
index 0c28b315..55fed166 100644
--- a/megapixels/app/settings/app_cfg.py
+++ b/megapixels/app/settings/app_cfg.py
@@ -7,6 +7,8 @@ from dotenv import load_dotenv
 from app.settings import types
 from app.utils import click_utils
 
+import codecs
+codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else None)
 
 # -----------------------------------------------------------------------------
 # Enun lists used for custom Click Params
@@ -87,7 +89,7 @@ CKPT_ZERO_PADDING = 9
 HASH_TREE_DEPTH = 3
 HASH_BRANCH_SIZE = 3
 
-DLIB_FACEREC_JITTERS = 25  # number of face recognition jitters
+DLIB_FACEREC_JITTERS = 5  # number of face recognition jitters
 DLIB_FACEREC_PADDING = 0.25  # default dlib
 
 POSE_MINMAX_YAW = (-25,25)
diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py
index ecfae0cb..b3d3a8c2 100644
--- a/megapixels/app/site/parser.py
+++ b/megapixels/app/site/parser.py
@@ -64,7 +64,7 @@ def format_applet(section, s3_path):
   else:
     command = payload[0]
     opt = None
-  if command == 'python':
+  if command == 'python' or command == 'javascript' or command == 'code':
     return format_section([ section ], s3_path)
 
   applet['command'] = command
author	adamhrv <adam@ahprojects.com>	2018-12-18 01:15:48 +0100
committer	adamhrv <adam@ahprojects.com>	2018-12-18 01:15:48 +0100
commit	162246a0f1931428c85ab9a31ba42de9ef34dae3 (patch)
tree	a7a80b9836025400269fcb7623a934deb37bbc39 /megapixels
parent	994d74feae29f2577bc04e10dd4bafbfb3dc8e83 (diff)
parent	bf3dd1399e4ef1db5fb8830004827fe603f73b2e (diff)