diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-04-15 16:26:03 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-04-15 16:26:03 +0200 |
| commit | 79f0e696f3f6067a0841a37404fb546dedaa07cb (patch) | |
| tree | a064f2841dc532f81fcf04eb84300e679fda2b27 /check/app/models/sql_factory.py | |
| parent | e257e83f313a2976347b0a30f58e66b7bcbc1235 (diff) | |
cli suite working
Diffstat (limited to 'check/app/models/sql_factory.py')
| -rw-r--r-- | check/app/models/sql_factory.py | 50 |
1 files changed, 43 insertions, 7 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index d4a371e..1d32a68 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -3,11 +3,17 @@ import glob import time import pandas as pd -from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger +from PIL import Image + +from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger, text from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base from app.settings import app_cfg as cfg +from app.settings.types import VALID_IMAGE_EXTENSIONS + +from app.utils.im_utils import compute_phash_int +from app.utils.file_utils import sha256 connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( os.getenv("DB_USER"), @@ -19,16 +25,15 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( loaded = False engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) Session = sessionmaker(bind=engine) - Base = declarative_base() class FileTable(Base): """Table for storing various hashes of images""" __tablename__ = 'files' id = Column(Integer, primary_key=True) - sha256 = Column(String(36, convert_unicode=True), nullable=False) + sha256 = Column(String(64, convert_unicode=True), nullable=False) phash = Column(BigInteger, nullable=False) - ext = Column(String(3, convert_unicode=True), nullable=False) + ext = Column(String(4, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, @@ -37,12 +42,21 @@ class FileTable(Base): 'ext': self.ext, } +Base.metadata.create_all(engine) + def search_by_phash(phash, threshold=6): """Search files for a particular phash""" connection = engine.connect() - cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' - matches = connection.execute(text(cmd), phash=phash, threshold=threshold) - return matches + cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' + matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall() + keys = ('id', 'sha256', 'phash', 'ext', 'score') + results = [ dict(zip(keys, values)) for values in matches ] + return results + +def search_by_hash(hash): + session = Session() + match = session.query(FileTable).filter(FileTable.sha256 == hash) + return match.first() def add_phash(sha256, phash, ext): """Add a file to the table""" @@ -51,4 +65,26 @@ def add_phash(sha256, phash, ext): ) session = Session() session.add(rec) + session.commit() session.flush() + +def add_phash_by_filename(path): + """Add a file by filename, getting all the necessary attributes""" + print(path) + if not os.path.exists(path): + print("File does not exist") + return + + dir, fn = os.path.split(path) + root, ext = os.path.splitext(fn) + ext = ext.strip('.') + if ext not in VALID_IMAGE_EXTENSIONS: + print("Not an image file") + return + + im = Image.open(path).convert('RGB') + phash = compute_phash_int(im) + + hash = sha256(path) + + add_phash(sha256=hash, phash=phash, ext=ext) |
