diff options
Diffstat (limited to 'megapixels')
| -rw-r--r-- | megapixels/app/models/sql_factory.py | 33 | ||||
| -rw-r--r-- | megapixels/app/server/api.py | 105 | ||||
| -rw-r--r-- | megapixels/app/settings/app_cfg.py | 4 | ||||
| -rw-r--r-- | megapixels/app/site/parser.py | 2 |
4 files changed, 92 insertions, 52 deletions
diff --git a/megapixels/app/models/sql_factory.py b/megapixels/app/models/sql_factory.py index 9a44941b..da95b539 100644 --- a/megapixels/app/models/sql_factory.py +++ b/megapixels/app/models/sql_factory.py @@ -10,7 +10,7 @@ from sqlalchemy.ext.declarative import declarative_base from app.utils.file_utils import load_recipe, load_csv_safe from app.settings import app_cfg as cfg -connection_url = "mysql+mysqldb://{}:{}@{}/{}".format( +connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( os.getenv("DB_USER"), os.getenv("DB_PASS"), os.getenv("DB_HOST"), @@ -35,7 +35,12 @@ def load_sql_datasets(replace=False, base_model=None): global datasets, loaded, Session if loaded: return datasets - engine = create_engine(connection_url) + engine = create_engine(connection_url, encoding="utf-8") + # db.set_character_set('utf8') + # dbc = db.cursor() + # dbc.execute('SET NAMES utf8;') + # dbc.execute('SET CHARACTER SET utf8;') + # dbc.execute('SET character_set_connection=utf8;') Session = sessionmaker(bind=engine) for path in glob.iglob(os.path.join(cfg.DIR_FAISS_METADATA, "*")): dataset = load_sql_dataset(path, replace, engine, base_model) @@ -92,13 +97,27 @@ class SqlDataset: 'pose': self.select('pose', id), } + def search_name(self, q): + table = self.get_table('identity_meta') + uuid_table = self.get_table('uuids') + + identity = table.query.filter(table.fullname.like(q)).order_by(table.fullname.desc()).limit(30) + identities = [] + for row in identity: + uuid = uuid_table.query.filter(uuid_table.id == row.image_id).first() + identities.append({ + 'uuid': uuid.toJSON(), + 'identity': row.toJSON(), + }) + return identities + def select(self, table, id): table = self.get_table(table) if not table: return None session = Session() # for obj in session.query(table).filter_by(id=id): - print(table) + # print(table) obj = session.query(table).filter(table.id == id).first() session.close() return obj.toJSON() @@ -125,7 +144,7 @@ class SqlDataset: class UUID(self.base_model): __tablename__ = self.name + "_uuid" id = Column(Integer, primary_key=True) - uuid = Column(String(36), nullable=False) + uuid = Column(String(36, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, @@ -167,9 +186,9 @@ class SqlDataset: class Identity(self.base_model): __tablename__ = self.name + "_identity" id = Column(Integer, primary_key=True) - fullname = Column(String(36), nullable=False) - description = Column(String(36), nullable=False) - gender = Column(String(1), nullable=False) + fullname = Column(String(36, convert_unicode=True), nullable=False) + description = Column(String(36, convert_unicode=True), nullable=False) + gender = Column(String(1, convert_unicode=True), nullable=False) images = Column(Integer, nullable=False) image_id = Column(Integer, nullable=False) def toJSON(self): diff --git a/megapixels/app/server/api.py b/megapixels/app/server/api.py index bc60118c..35862837 100644 --- a/megapixels/app/server/api.py +++ b/megapixels/app/server/api.py @@ -15,68 +15,84 @@ from app.utils.im_utils import pil2np sanitize_re = re.compile('[\W]+') valid_exts = ['.gif', '.jpg', '.jpeg', '.png'] +LIMIT = 9 +THRESHOLD = 0.3 + api = Blueprint('api', __name__) faiss_datasets = load_faiss_databases() @api.route('/') def index(): + """List the datasets and their fields""" return jsonify({ 'datasets': list_datasets() }) -@api.route('/dataset/<name>') -def show(name): - dataset = get_dataset(name) + +@api.route('/dataset/<dataset_name>') +def show(dataset_name): + """Show the data that a dataset will return""" + dataset = get_dataset(dataset_name) if dataset: return jsonify(dataset.describe()) else: return jsonify({ 'status': 404 }) -@api.route('/dataset/<name>/face/', methods=['POST']) -def upload(name): + +@api.route('/dataset/<dataset_name>/face', methods=['POST']) +def upload(dataset_name): + """Query an image against FAISS and return the matching identities""" start = time.time() - dataset = get_dataset(name) - if name not in faiss_datasets: + dataset = get_dataset(dataset_name) + if dataset_name not in faiss_datasets: return jsonify({ 'error': 'invalid dataset' }) - faiss_dataset = faiss_datasets[name] + faiss_dataset = faiss_datasets[dataset_name] file = request.files['query_img'] fn = file.filename if fn.endswith('blob'): fn = 'filename.jpg' basename, ext = os.path.splitext(fn) - print("got {}, type {}".format(basename, ext)) + # print("got {}, type {}".format(basename, ext)) if ext.lower() not in valid_exts: return jsonify({ 'error': 'not an image' }) im = Image.open(file.stream).convert('RGB') im_np = pil2np(im) - + # Face detection detector = face_detector.DetectorDLIBHOG() # get detection as BBox object bboxes = detector.detect(im_np, largest=True) - if not len(bboxes): + if not bboxes or not len(bboxes): return jsonify({ 'error': 'bbox' }) bbox = bboxes[0] + if not bbox: + return jsonify({ + 'error': 'bbox' + }) + dim = im_np.shape[:2][::-1] bbox = bbox.to_dim(dim) # convert back to real dimensions + # print("got bbox") + if not bbox: + return jsonify({ + 'error': 'bbox' + }) - # face recognition/vector + # extract 128-D vector recognition = face_recognition.RecognitionDLIB(gpu=-1) vec = recognition.vec(im_np, bbox) - - # print(vec) query = np.array([ vec ]).astype('float32') - # query FAISS! - distances, indexes = faiss_dataset.search(query, 10) + # query FAISS + distances, indexes = faiss_dataset.search(query, LIMIT) - if len(indexes) == 0: + if len(indexes) == 0 or len(indexes[0]) == 0: return jsonify({ 'error': 'nomatch' }) @@ -85,48 +101,51 @@ def upload(name): distances = distances[0] indexes = indexes[0] - if len(indexes) == 0: - return jsonify({ - 'error': 'nomatch' - }) - - lookup = {} - ids = [i+1 for i in indexes] + dists = [] + ids = [] for _d, _i in zip(distances, indexes): - lookup[_i+1] = _d + if _d <= THRESHOLD: + dists.append(round(float(_d), 2)) + ids.append(_i+1) + + results = [ dataset.get_identity(int(_i)) for _i in ids ] - print(distances) - print(indexes) + # print(distances) + # print(ids) - # with the result we have an ID - # query the sql dataset for the UUID etc here + # 'bbox': str(bboxes[0]), + # 'bbox_dim': str(bbox), + # print(bboxes[0]) + # print(bbox) query = { - 'timing': time.time() - start, + 'timing': round(time.time() - start, 3), + 'bbox': str(bbox), } - results = [ dataset.get_identity(id) for id in ids ] - - print(results) + # print(results) return jsonify({ + 'query': query, 'results': results, - 'distances': distances.tolist(), - 'indexes': indexes.tolist(), + 'distances': dists, }) -@api.route('/dataset/<name>/name', methods=['GET']) -def name_lookup(dataset): + +@api.route('/dataset/<dataset_name>/name', methods=['GET','POST']) +def name_lookup(dataset_name): + """Find a name in the dataset""" start = time.time() - dataset = get_dataset(name) + dataset = get_dataset(dataset_name) - # we have a query from the request query string... - # use this to do a like* query on the identities_meta table + q = request.args.get('q') + # print(q) query = { + 'q': q, 'timing': time.time() - start, } - results = [] - - print(results) + results = dataset.search_name(q + '%') if q else None + + # print(results) return jsonify({ 'query': query, 'results': results, diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index 0c28b315..55fed166 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -7,6 +7,8 @@ from dotenv import load_dotenv from app.settings import types from app.utils import click_utils +import codecs +codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else None) # ----------------------------------------------------------------------------- # Enun lists used for custom Click Params @@ -87,7 +89,7 @@ CKPT_ZERO_PADDING = 9 HASH_TREE_DEPTH = 3 HASH_BRANCH_SIZE = 3 -DLIB_FACEREC_JITTERS = 25 # number of face recognition jitters +DLIB_FACEREC_JITTERS = 5 # number of face recognition jitters DLIB_FACEREC_PADDING = 0.25 # default dlib POSE_MINMAX_YAW = (-25,25) diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index ecfae0cb..b3d3a8c2 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -64,7 +64,7 @@ def format_applet(section, s3_path): else: command = payload[0] opt = None - if command == 'python': + if command == 'python' or command == 'javascript' or command == 'code': return format_section([ section ], s3_path) applet['command'] = command |
