diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-14 20:08:28 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-14 20:08:28 +0200 |
| commit | 890586175ff054cc01d39f9931ab2e4bae62c1e1 (patch) | |
| tree | 38abf826df54b3793cfb364a1dda9748f5d99ab6 /check | |
| parent | 61f169d109c9ba1c01ada06d830973c873d4e634 (diff) | |
search by phash
Diffstat (limited to 'check')
| -rw-r--r-- | check/app/models/__init__.py | 1 | ||||
| -rw-r--r-- | check/app/models/sql_factory.py | 133 | ||||
| -rw-r--r-- | check/app/server/__init__.py | 1 | ||||
| -rw-r--r-- | check/app/server/api.py | 8 | ||||
| -rw-r--r-- | check/app/settings/__init__.py | 1 |
5 files changed, 24 insertions, 120 deletions
diff --git a/check/app/models/__init__.py b/check/app/models/__init__.py index 93f5256..e69de29 100644 --- a/check/app/models/__init__.py +++ b/check/app/models/__init__.py @@ -1 +0,0 @@ -__init__.py
\ No newline at end of file diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 499113d..3ed3af0 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -3,7 +3,7 @@ import glob import time import pandas as pd -from sqlalchemy import create_engine, Table, Column, String, BigInteger, Integer, DateTime, func +from sqlalchemy import create_engine, Table, Column, String, BigInteger from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base @@ -17,12 +17,13 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( ) loaded = False -Session = None +engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) +Session = sessionmaker(bind=engine) Base = declarative_base() -class ImageHashTable(Base): - __tablename__ = 'imagehashes' +class PHashTable(Base): + __tablename__ = 'phashes' id = Column(Integer, primary_key=True) sha256 = Column(String(36, convert_unicode=True), nullable=False) phash = Column(BigInteger(blank=True), nullable=False) @@ -35,116 +36,16 @@ class ImageHashTable(Base): 'ext': self.ext, } -def load_sql_datasets(replace=False, base_model=None): - engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) - # db.set_character_set('utf8') - # dbc = db.cursor() - # dbc.execute('SET NAMES utf8;') - # dbc.execute('SET CHARACTER SET utf8;') - # dbc.execute('SET character_set_connection=utf8;') - Session = sessionmaker(bind=engine) +def search_by_phash(phash, threshold=6): + connection = engine.connect() + cmd = 'SELECT phashes.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' + matches = connection.execute(text(cmd), phash=phash, threshold=threshold) + return matches - for fn in glob.iglob(os.path.join(path, "*.csv")): - key = os.path.basename(fn).replace(".csv", "") - if replace: - print('loading dataset {}'.format(fn)) - df = pd.read_csv(fn) - # fix columns that are named "index", a sql reserved word - df.reindex_axis(sorted(df.columns), axis=1) - columns = [column.name for column in ImageHashTable.__table__.columns] - df.columns = columns - df.to_sql(name=ImageHashTable.__tablename__, con=engine, if_exists='replace', index=False) - return dataset - -class SqlDataset: - """ - Bridge between the facial information CSVs connected to the datasets, and MySQL - - each dataset should have files that can be loaded into these database models - - names will be fixed to work in SQL (index -> id) - - we can then have more generic models for fetching this info after doing a FAISS query - """ - def __init__(self, name, engine=None, base_model=None): - self.name = name - self.tables = {} - if base_model is None: - self.engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) - base_model = declarative_base(engine) - self.base_model = base_model - - def describe(self): - """ - List the available SQL tables for a given dataset. - """ - return { - 'name': self.name, - 'tables': list(self.tables.keys()), - } - - def get_identity(self, id): - """ - Get an identity given an ID. - """ - # id += 1 - file_record_table = self.get_table('file_record') - file_record = file_record_table.query.filter(file_record_table.id == id).first() - - if not file_record: - return None - - identity_table = self.get_table('identity') - identity = identity_table.query.filter(identity_table.id == file_record.identity_id).first() - - if not identity: - return None - - return { - 'file_record': file_record.toJSON(), - 'identity': identity.toJSON(), - 'face_roi': self.select('face_roi', id), - 'face_pose': self.select('face_pose', id), - } - - def search_name(self, q): - """ - Find an identity by name. - """ - table = self.get_table('identity') - identity_list = table.query.filter(table.fullname.ilike(q)).order_by(table.fullname.desc()).limit(15) - return identity_list - - def search_description(self, q): - """ - Find an identity by description. - """ - table = self.get_table('identity') - identity_list = table.query.filter(table.description.ilike(q)).order_by(table.description.desc()).limit(15) - return identity_list - - def get_file_records_for_identities(self, identity_list): - """ - Given a list of identities, map these to file records. - """ - identities = [] - file_record_table = self.get_table('file_record') - for row in identity_list: - file_record = file_record_table.query.filter(file_record_table.identity_id == row.id).first() - if file_record: - identities.append({ - 'file_record': file_record.toJSON(), - 'identity': row.toJSON(), - }) - return identities - - def select(self, table, id): - """ - Perform a generic select. - """ - table = self.get_table(table) - if not table: - return None - session = Session() - # for obj in session.query(table).filter_by(id=id): - # print(table) - obj = session.query(table).filter(table.id == id).first() - session.close() - return obj.toJSON() +def add_phash(sha256, phash, ext): + rec = PHashTable( + sha256=sha256, phash=phash, ext=ext, + ) + session = Session() + session.add(rec) + session.flush() diff --git a/check/app/server/__init__.py b/check/app/server/__init__.py index 93f5256..e69de29 100644 --- a/check/app/server/__init__.py +++ b/check/app/server/__init__.py @@ -1 +0,0 @@ -__init__.py
\ No newline at end of file diff --git a/check/app/server/api.py b/check/app/server/api.py index 620e604..5c5b4bf 100644 --- a/check/app/server/api.py +++ b/check/app/server/api.py @@ -16,5 +16,11 @@ api = Blueprint('api', __name__) @api.route('/') def index(): - """List the datasets and their fields""" + """API status noop""" + return jsonify({ 'status': 'ok' }) + +@api.route('/v1/match/') +def match(): + """Search by image""" + # get threshold return jsonify({ 'status': 'ok' }) diff --git a/check/app/settings/__init__.py b/check/app/settings/__init__.py index 93f5256..e69de29 100644 --- a/check/app/settings/__init__.py +++ b/check/app/settings/__init__.py @@ -1 +0,0 @@ -__init__.py
\ No newline at end of file |
