diff options
Diffstat (limited to 'check/app/models/sql_factory.py')
| -rw-r--r-- | check/app/models/sql_factory.py | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index 1d32a68..ad27f62 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -32,24 +32,32 @@ class FileTable(Base): __tablename__ = 'files' id = Column(Integer, primary_key=True) sha256 = Column(String(64, convert_unicode=True), nullable=False) - phash = Column(BigInteger, nullable=False) + phash = Column(BigInteger, nullable=False, index=True) ext = Column(String(4, convert_unicode=True), nullable=False) + url = Column(String(255, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, 'sha256': self.sha256, 'phash': self.phash, 'ext': self.ext, + 'url': self.url, } Base.metadata.create_all(engine) -def search_by_phash(phash, threshold=6): +def search_by_phash(phash, threshold=6, limit=1): """Search files for a particular phash""" connection = engine.connect() - cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' - matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall() - keys = ('id', 'sha256', 'phash', 'ext', 'score') + cmd = """ + SELECT files.*, BIT_COUNT(phash ^ :phash) + AS hamming_distance FROM files + HAVING hamming_distance < :threshold + ORDER BY hamming_distance ASC + LIMIT :limit + """ + matches = connection.execute(text(cmd), phash=phash, threshold=threshold, limit=limit).fetchall() + keys = ('id', 'sha256', 'phash', 'ext', 'url', 'score') results = [ dict(zip(keys, values)) for values in matches ] return results @@ -58,11 +66,9 @@ def search_by_hash(hash): match = session.query(FileTable).filter(FileTable.sha256 == hash) return match.first() -def add_phash(sha256, phash, ext): +def add_phash(sha256=None, phash=None, ext=None, url=None): """Add a file to the table""" - rec = FileTable( - sha256=sha256, phash=phash, ext=ext, - ) + rec = FileTable(sha256=sha256, phash=phash, ext=ext, url=url) session = Session() session.add(rec) session.commit() @@ -87,4 +93,4 @@ def add_phash_by_filename(path): hash = sha256(path) - add_phash(sha256=hash, phash=phash, ext=ext) + add_phash(sha256=hash, phash=phash, ext=ext, url=path) |
