summaryrefslogtreecommitdiff
path: root/check/app/models/sql_factory.py
blob: 3ed3af064d76e55cbcd57af43b40565e42ad0633 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import glob
import time
import pandas as pd

from sqlalchemy import create_engine, Table, Column, String, BigInteger
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

from app.settings import app_cfg as cfg

connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format(
  os.getenv("DB_USER"),
  os.getenv("DB_PASS"),
  os.getenv("DB_HOST"),
  os.getenv("DB_NAME")
)

loaded = False
engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600)
Session = sessionmaker(bind=engine)

Base = declarative_base()

class PHashTable(Base):
  __tablename__ = 'phashes'
  id = Column(Integer, primary_key=True)
  sha256 = Column(String(36, convert_unicode=True), nullable=False)
  phash = Column(BigInteger(blank=True), nullable=False)
  ext = Column(String(3, convert_unicode=True), nullable=False)
  def toJSON(self):
    return {
      'id': self.id,
      'sha256': self.sha256,
      'phash': self.phash,
      'ext': self.ext,
    }

def search_by_phash(phash, threshold=6):
  connection = engine.connect()
  cmd = 'SELECT phashes.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1'
  matches = connection.execute(text(cmd), phash=phash, threshold=threshold)
  return matches

def add_phash(sha256, phash, ext):
  rec = PHashTable(
    sha256=sha256, phash=phash, ext=ext,
  )
  session = Session()
  session.add(rec)
  session.flush()