summaryrefslogtreecommitdiff
path: root/check/app/models/sql_factory.py
diff options
context:
space:
mode:
Diffstat (limited to 'check/app/models/sql_factory.py')
-rw-r--r--check/app/models/sql_factory.py26
1 files changed, 16 insertions, 10 deletions
diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py
index 1d32a68..ad27f62 100644
--- a/check/app/models/sql_factory.py
+++ b/check/app/models/sql_factory.py
@@ -32,24 +32,32 @@ class FileTable(Base):
__tablename__ = 'files'
id = Column(Integer, primary_key=True)
sha256 = Column(String(64, convert_unicode=True), nullable=False)
- phash = Column(BigInteger, nullable=False)
+ phash = Column(BigInteger, nullable=False, index=True)
ext = Column(String(4, convert_unicode=True), nullable=False)
+ url = Column(String(255, convert_unicode=True), nullable=False)
def toJSON(self):
return {
'id': self.id,
'sha256': self.sha256,
'phash': self.phash,
'ext': self.ext,
+ 'url': self.url,
}
Base.metadata.create_all(engine)
-def search_by_phash(phash, threshold=6):
+def search_by_phash(phash, threshold=6, limit=1):
"""Search files for a particular phash"""
connection = engine.connect()
- cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1'
- matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall()
- keys = ('id', 'sha256', 'phash', 'ext', 'score')
+ cmd = """
+ SELECT files.*, BIT_COUNT(phash ^ :phash)
+ AS hamming_distance FROM files
+ HAVING hamming_distance < :threshold
+ ORDER BY hamming_distance ASC
+ LIMIT :limit
+ """
+ matches = connection.execute(text(cmd), phash=phash, threshold=threshold, limit=limit).fetchall()
+ keys = ('id', 'sha256', 'phash', 'ext', 'url', 'score')
results = [ dict(zip(keys, values)) for values in matches ]
return results
@@ -58,11 +66,9 @@ def search_by_hash(hash):
match = session.query(FileTable).filter(FileTable.sha256 == hash)
return match.first()
-def add_phash(sha256, phash, ext):
+def add_phash(sha256=None, phash=None, ext=None, url=None):
"""Add a file to the table"""
- rec = FileTable(
- sha256=sha256, phash=phash, ext=ext,
- )
+ rec = FileTable(sha256=sha256, phash=phash, ext=ext, url=url)
session = Session()
session.add(rec)
session.commit()
@@ -87,4 +93,4 @@ def add_phash_by_filename(path):
hash = sha256(path)
- add_phash(sha256=hash, phash=phash, ext=ext)
+ add_phash(sha256=hash, phash=phash, ext=ext, url=path)