import os import glob import time import pandas as pd from sqlalchemy import create_engine, Table, Column, String, BigInteger from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base from app.settings import app_cfg as cfg connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( os.getenv("DB_USER"), os.getenv("DB_PASS"), os.getenv("DB_HOST"), os.getenv("DB_NAME") ) loaded = False engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) Session = sessionmaker(bind=engine) Base = declarative_base() class PHashTable(Base): __tablename__ = 'phashes' id = Column(Integer, primary_key=True) sha256 = Column(String(36, convert_unicode=True), nullable=False) phash = Column(BigInteger(blank=True), nullable=False) ext = Column(String(3, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, 'sha256': self.sha256, 'phash': self.phash, 'ext': self.ext, } def search_by_phash(phash, threshold=6): connection = engine.connect() cmd = 'SELECT phashes.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' matches = connection.execute(text(cmd), phash=phash, threshold=threshold) return matches def add_phash(sha256, phash, ext): rec = PHashTable( sha256=sha256, phash=phash, ext=ext, ) session = Session() session.add(rec) session.flush()