diff options
Diffstat (limited to 'check/app')
| -rw-r--r-- | check/app/__init__.py | 0 | ||||
| -rw-r--r-- | check/app/models/click_factory.py | 145 | ||||
| -rw-r--r-- | check/app/models/sql_factory.py | 50 | ||||
| -rw-r--r-- | check/app/settings/app_cfg.py | 45 | ||||
| -rw-r--r-- | check/app/settings/types.py | 10 | ||||
| -rw-r--r-- | check/app/utils/im_utils.py | 3 | ||||
| -rw-r--r-- | check/app/utils/logger_utils.py | 68 |
7 files changed, 312 insertions, 9 deletions
diff --git a/check/app/__init__.py b/check/app/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/check/app/__init__.py diff --git a/check/app/models/click_factory.py b/check/app/models/click_factory.py new file mode 100644 index 0000000..61a3b5e --- /dev/null +++ b/check/app/models/click_factory.py @@ -0,0 +1,145 @@ +""" +Click processor factory +- Inspired by and used code from @wiretapped's HTSLAM codebase +- In particular the very useful +""" + +import os +import sys +from os.path import join +from pathlib import Path +import os +from os.path import join +import sys +from functools import update_wrapper, wraps +import itertools +from pathlib import Path +from glob import glob +import importlib +import logging + +import click +from app.settings import app_cfg as cfg + + +# -------------------------------------------------------- +# Click Group Class +# -------------------------------------------------------- + +# set global variable during parent class create +dir_plugins = None # set in create + +class ClickComplex: + """Wrapper generator for custom Click CLI's based on LR's coroutine""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + try: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + except Exception as ex: + logging.getLogger('app').error('{}'.format(ex)) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # Based on code from @wiretapped + HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup + + + +class ClickSimple: + """Wrapper generator for custom Click CLI's""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # from HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins # from CliGenerator init + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup diff --git a/check/app/models/sql_factory.py b/check/app/models/sql_factory.py index d4a371e..1d32a68 100644 --- a/check/app/models/sql_factory.py +++ b/check/app/models/sql_factory.py @@ -3,11 +3,17 @@ import glob import time import pandas as pd -from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger +from PIL import Image + +from sqlalchemy import create_engine, Table, Column, String, Integer, BigInteger, text from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base from app.settings import app_cfg as cfg +from app.settings.types import VALID_IMAGE_EXTENSIONS + +from app.utils.im_utils import compute_phash_int +from app.utils.file_utils import sha256 connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( os.getenv("DB_USER"), @@ -19,16 +25,15 @@ connection_url = "mysql+mysqlconnector://{}:{}@{}/{}?charset=utf8mb4".format( loaded = False engine = create_engine(connection_url, encoding="utf-8", pool_recycle=3600) Session = sessionmaker(bind=engine) - Base = declarative_base() class FileTable(Base): """Table for storing various hashes of images""" __tablename__ = 'files' id = Column(Integer, primary_key=True) - sha256 = Column(String(36, convert_unicode=True), nullable=False) + sha256 = Column(String(64, convert_unicode=True), nullable=False) phash = Column(BigInteger, nullable=False) - ext = Column(String(3, convert_unicode=True), nullable=False) + ext = Column(String(4, convert_unicode=True), nullable=False) def toJSON(self): return { 'id': self.id, @@ -37,12 +42,21 @@ class FileTable(Base): 'ext': self.ext, } +Base.metadata.create_all(engine) + def search_by_phash(phash, threshold=6): """Search files for a particular phash""" connection = engine.connect() - cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM images_image HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' - matches = connection.execute(text(cmd), phash=phash, threshold=threshold) - return matches + cmd = 'SELECT files.*, BIT_COUNT(phash ^ :phash) as hamming_distance FROM files HAVING hamming_distance < :threshold ORDER BY hamming_distance ASC LIMIT 1' + matches = connection.execute(text(cmd), phash=phash, threshold=threshold).fetchall() + keys = ('id', 'sha256', 'phash', 'ext', 'score') + results = [ dict(zip(keys, values)) for values in matches ] + return results + +def search_by_hash(hash): + session = Session() + match = session.query(FileTable).filter(FileTable.sha256 == hash) + return match.first() def add_phash(sha256, phash, ext): """Add a file to the table""" @@ -51,4 +65,26 @@ def add_phash(sha256, phash, ext): ) session = Session() session.add(rec) + session.commit() session.flush() + +def add_phash_by_filename(path): + """Add a file by filename, getting all the necessary attributes""" + print(path) + if not os.path.exists(path): + print("File does not exist") + return + + dir, fn = os.path.split(path) + root, ext = os.path.splitext(fn) + ext = ext.strip('.') + if ext not in VALID_IMAGE_EXTENSIONS: + print("Not an image file") + return + + im = Image.open(path).convert('RGB') + phash = compute_phash_int(im) + + hash = sha256(path) + + add_phash(sha256=hash, phash=phash, ext=ext) diff --git a/check/app/settings/app_cfg.py b/check/app/settings/app_cfg.py index 904e0d4..ce5b5d8 100644 --- a/check/app/settings/app_cfg.py +++ b/check/app/settings/app_cfg.py @@ -14,7 +14,7 @@ codecs.register(lambda name: codecs.lookup('utf8') if name == 'utf8mb4' else Non # ----------------------------------------------------------------------------- # click settings # ----------------------------------------------------------------------------- -DIR_COMMANDS_CHECK = 'commands/check' +DIR_COMMANDS_PHASH = 'commands/phash' # ----------------------------------------------------------------------------- # S3 storage @@ -29,3 +29,46 @@ S3_HTTP_METADATA_URL = join(S3_HTTP_URL, 'metadata') # ----------------------------------------------------------------------------- CELERY_BROKER_URL = 'redis://localhost:6379/0' CELERY_RESULT_BACKEND = 'redis://localhost:6379/0' + +# ----------------------------------------------------------------------------- +# Logging options exposed for custom click Params +# ----------------------------------------------------------------------------- +LOGGER_NAME = 'app' +LOGLEVELS = { + types.LogLevel.DEBUG: logging.DEBUG, + types.LogLevel.INFO: logging.INFO, + types.LogLevel.WARN: logging.WARN, + types.LogLevel.ERROR: logging.ERROR, + types.LogLevel.CRITICAL: logging.CRITICAL +} +LOGLEVEL_OPT_DEFAULT = types.LogLevel.DEBUG.name +#LOGFILE_FORMAT = "%(asctime)s: %(levelname)s: %(message)s" +#LOGFILE_FORMAT = "%(levelname)s:%(name)s: %(message)s" +#LOGFILE_FORMAT = "%(levelname)s: %(message)s" +#LOGFILE_FORMAT = "%(filename)s:%(lineno)s %(funcName)s() %(message)s" +# colored logs +""" +black, red, green, yellow, blue, purple, cyan and white. +{color}, fg_{color}, bg_{color}: Foreground and background colors. +bold, bold_{color}, fg_bold_{color}, bg_bold_{color}: Bold/bright colors. +reset: Clear all formatting (both foreground and background colors). +""" +LOGFILE_FORMAT = "%(log_color)s%(levelname)-8s%(reset)s %(cyan)s%(filename)s:%(lineno)s:%(bold_cyan)s%(funcName)s() %(reset)s%(message)s" + +LogLevelVar = click_utils.ParamVar(types.LogLevel) + +# ----------------------------------------------------------------------------- +# Filesystem settings +# hash trees enforce a maximum number of directories per directory +# ----------------------------------------------------------------------------- +ZERO_PADDING = 6 # padding for enumerated image filenames +#FRAME_NAME_ZERO_PADDING = 6 # is this active?? +CKPT_ZERO_PADDING = 9 +HASH_TREE_DEPTH = 3 +HASH_BRANCH_SIZE = 3 + +# ----------------------------------------------------------------------------- +# .env config for keys +# ----------------------------------------------------------------------------- +# DIR_DOTENV = join(DIR_APP, '.env') +load_dotenv() # dotenv_path=DIR_DOTENV) diff --git a/check/app/settings/types.py b/check/app/settings/types.py index e3e9b4e..044579b 100644 --- a/check/app/settings/types.py +++ b/check/app/settings/types.py @@ -5,3 +5,13 @@ def find_type(name, enum_type): if name == enum_opt.name.lower(): return enum_opt return None + +# --------------------------------------------------------------------- +# Logger, monitoring +# -------------------------------------------------------------------- + +class LogLevel(Enum): + """Loger vebosity""" + DEBUG, INFO, WARN, ERROR, CRITICAL = range(5) + +VALID_IMAGE_EXTENSIONS = ['gif', 'jpg', 'jpeg', 'png'] diff --git a/check/app/utils/im_utils.py b/check/app/utils/im_utils.py index dfd5739..747e900 100644 --- a/check/app/utils/im_utils.py +++ b/check/app/utils/im_utils.py @@ -15,6 +15,7 @@ import torch import torch.nn as nn import torchvision.models as models import torchvision.transforms as transforms +import struct from torch.autograd import Variable from sklearn.metrics.pairwise import cosine_similarity import datetime @@ -214,7 +215,7 @@ def phash2int(phash): :returns: binary-encoded bigint """ phash.hash[-1] = False - phash_as_bigint = struct.unpack('Q', numpy.packbits(phash.hash))[0] + phash_as_bigint = struct.unpack('Q', np.packbits(phash.hash))[0] return phash_as_bigint def compute_phash_int(im): diff --git a/check/app/utils/logger_utils.py b/check/app/utils/logger_utils.py new file mode 100644 index 0000000..d4f962e --- /dev/null +++ b/check/app/utils/logger_utils.py @@ -0,0 +1,68 @@ +""" +Logger instantiator for use with Click utlity scripts +""" +import sys +import os +import logging + +import colorlog + +from app.settings import app_cfg as cfg + + +class Logger: + + logger_name = 'app' + + def __init__(self): + pass + + @staticmethod + def create(verbosity=4, logfile=None): + """Configures a logger from click params + :param verbosity: (int) between 0 and 5 + :param logfile: (str) path to logfile + :returns: logging root object + """ + + loglevel = (5 - (max(0, min(verbosity, 5)))) * 10 # where logging.DEBUG = 10 + date_format = '%Y-%m-%d %H:%M:%S' + if 'colorlog' in sys.modules and os.isatty(2): + cformat = '%(log_color)s' + cfg.LOGFILE_FORMAT + f = colorlog.ColoredFormatter(cformat, date_format, + log_colors = { 'DEBUG' : 'yellow', 'INFO' : 'white', + 'WARNING' : 'bold_yellow', 'ERROR': 'bold_red', + 'CRITICAL': 'bold_red' }) + else: + f = logging.Formatter(cfg.LOGFILE_FORMAT, date_format) + + # logger = logging.getLogger(Logger.logger_name) + logger = logging.getLogger(cfg.LOGGER_NAME) + logger.setLevel(loglevel) + + if logfile: + # create file handler which logs even debug messages + fh = logging.FileHandler(logfile) + fh.setLevel(loglevel) + logger.addHandler(fh) + + # add colored handler + ch = logging.StreamHandler() + ch.setFormatter(f) + logger.addHandler(ch) + + if verbosity == 0: + logger.disabled = True + + # test + # logger.debug('Hello Debug') + # logger.info('Hello Info') + # logger.warn('Hello Warn') + # logger.error('Hello Error') + # logger.critical('Hello Critical') + + return logger + + @staticmethod + def getLogger(): + return logging.getLogger(cfg.LOGGER_NAME)
\ No newline at end of file |
