From d9c3928e542faabaf8a9cb3d235029939cb65f09 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sun, 1 Dec 2019 11:07:59 +0100 Subject: utilz --- cli/app/utils/api_utils.py | 170 ++++++++++++ cli/app/utils/click_factory.py | 145 +++++++++++ cli/app/utils/click_utils.py | 62 +++++ cli/app/utils/color_utils.py | 5 + cli/app/utils/display_utils.py | 28 ++ cli/app/utils/draw_utils.py | 215 ++++++++++++++++ cli/app/utils/file_utils.py | 461 +++++++++++++++++++++++++++++++++ cli/app/utils/identity_utils.py | 161 ++++++++++++ cli/app/utils/im_utils.py | 556 ++++++++++++++++++++++++++++++++++++++++ cli/app/utils/logger_utils.py | 68 +++++ cli/app/utils/plot_utils.py | 149 +++++++++++ cli/app/utils/sheet_utils.py | 82 ++++++ 12 files changed, 2102 insertions(+) create mode 100644 cli/app/utils/api_utils.py create mode 100644 cli/app/utils/click_factory.py create mode 100644 cli/app/utils/click_utils.py create mode 100644 cli/app/utils/color_utils.py create mode 100644 cli/app/utils/display_utils.py create mode 100644 cli/app/utils/draw_utils.py create mode 100644 cli/app/utils/file_utils.py create mode 100644 cli/app/utils/identity_utils.py create mode 100644 cli/app/utils/im_utils.py create mode 100644 cli/app/utils/logger_utils.py create mode 100644 cli/app/utils/plot_utils.py create mode 100644 cli/app/utils/sheet_utils.py (limited to 'cli/app/utils') diff --git a/cli/app/utils/api_utils.py b/cli/app/utils/api_utils.py new file mode 100644 index 0000000..a4dad50 --- /dev/null +++ b/cli/app/utils/api_utils.py @@ -0,0 +1,170 @@ +import json +import urllib +import urllib.request + +from app.settings import app_cfg +from app.utils import logger_utils + + +class WikipediaAPI: + + url_base = 'https://en.wikipedia.org/w/api.php' + log = logger_utils.Logger.getLogger() + # https://en.wikipedia.org/w/api.php?redirects=& + # ppprop=displaytitle&prop=pageprops|pageimages|description&generator=prefixsearch + # &action=query&format=json&piprop=thumbnail&pithumbsize=160&pilimit=6&gpssearch=Vicente+Fox&gpsnamespace=0&gpslimit=6 + + def _url_builder(self, q): + + # https://www.mediawiki.org/wiki/API%3aProperties#Info%3a_Parameters + params = { + 'redirects': '', + 'ppprop': 'displaytitle', + 'prop': 'pageprops|pageimages|description', + 'generator': 'prefixsearch', + 'action': 'query', + 'format': 'json', + 'piprop': 'thumbnail', + #'pithumbsize': 160, + 'pilimit': 1, + 'gpssearch': q, + 'gpsnamespace': 0, + 'gpslimit': 1 + } + url = f'{self.url_base}?{urllib.parse.urlencode(params)}' + return url + + def _api_search(self, url): + # set empty object + obj = { + 'wp_description': '', + 'wp_page_id': '', + 'wp_name': '' + } + try: + json_response = urllib.request.urlopen(url).read() + response = json.loads(json_response) + obj['wp_accessed'] = True + query = response.get('query', None) + if query: + pages = query.get('pages',[]) + if pages: + page_id= list(pages.keys())[0] + if int(page_id) != -1: + page = pages[page_id] + # populate with successful result + obj['wp_name'] = page['title'] + obj['wp_page_id'] = page_id + obj['wp_description'] = page.get('description', '') # not always available + # if fail, return None + except Exception as e: + obj['wp_error'] = e + obj['wp_accessed'] = False + return obj + + def get_meta(self, query_obj, verbose=False): + '''Searches Wikipedia API for query string''' + + if query_obj.get('wp_accessed', False): + return query_obj + else: + url = self._url_builder(query_obj['query']) + if verbose: + self.log.debug(f'querying: {url}') + print(url) + return self._api_search(url) + + def search(self, q): + '''Searches Wikipedia API for query string''' + url = self._url_builder(q) + return self._api_search(url) + + +class GoogleKnowledgeGraph: + + url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search' + log = logger_utils.Logger.getLogger() + fp_api_key = app_cfg.FP_KNOWLEDGE_GRAPH_ENV + + def __init__(self, api_key=None): + if api_key is not None: + self._api_key = api_key + else: + self._api_key = open(self.fp_api_key).read() + + + def _get_kg_meta(self, result_obj, params): + + params['indent'] = True # JSON indent + params['key'] = self._api_key + params['limit'] = 1 + ''' + Restricts returned entities to those of the specified types. + For example, you can specify `Person` (as defined in http://schema.org/Person) + to restrict the results to entities representing people. + If multiple types are specified, returned entities will contain one or more of these type''' + params['types'] = 'Person' + + '''Enables prefix (initial substring) match against names and + aliases of entities. For example, a prefix `Jung` will match entities + and aliases such as `Jung`, `Jungle`, and `Jung-ho Kang`.''' + params['prefix'] = False + + url = f'{self.url_kg_api}?{urllib.parse.urlencode(params)}' + try: + json_response = urllib.request.urlopen(url).read() + except Exception as e: + result_obj['kg_error'] = str(e) + result_obj['kg_accessed'] = False + else: + response = json.loads(json_response) + items = response.get('itemListElement', []) + if items: + item = items[0] + item_result = item.get('result', []) + result_obj['kg_url'] = item.get('url', '') + result_obj['kg_description'] = item_result.get('description', '') + result_obj['kg_id'] = item_result.get('@id', '').replace('kg:','') + result_obj['kg_name'] = item_result.get('name', '') + result_obj['kg_score'] = item.get('resultScore', 0.0) + det_desc = item_result.get('detailedDescription', '') + if det_desc: + result_obj['kg_bio'] = det_desc.get('articleBody','') + result_obj['kg_bio_url'] = det_desc.get('url','') + else: + result_obj['kg_bio'] = '' + result_obj['kg_bio_url'] = '' + result_img = item_result.get('image', '') + if result_img: + result_obj['kg_image_url'] = result_img.get('contentUrl', '') + else: + result_obj['kg_image_url'] = '' + result_obj['kg_error'] = '' + else: + # search was valid but no results + result_obj['kg_url'] = '' + result_obj['kg_description'] = '' + result_obj['kg_id'] = '' + result_obj['kg_name'] = '' + result_obj['kg_score'] = 0 + result_obj['kg_bio'] = '' + result_obj['kg_bio_url'] = '' + result_obj['kg_image_url'] = '' + + result_obj['kg_accessed'] = True + + return result_obj + + + def get_kg_from_name(self, obj): + if obj.get('kg_accessed', False): + return obj + params = {'query': obj['query']} + return self._get_kg_meta(obj, params) + + + def get_kg_from_kg_id(self, obj): + if obj.get('kg_accessed', False): + return obj + params = {'ids': obj['kg_ig']} + return self._get_kg_meta(obj, params) diff --git a/cli/app/utils/click_factory.py b/cli/app/utils/click_factory.py new file mode 100644 index 0000000..61a3b5e --- /dev/null +++ b/cli/app/utils/click_factory.py @@ -0,0 +1,145 @@ +""" +Click processor factory +- Inspired by and used code from @wiretapped's HTSLAM codebase +- In particular the very useful +""" + +import os +import sys +from os.path import join +from pathlib import Path +import os +from os.path import join +import sys +from functools import update_wrapper, wraps +import itertools +from pathlib import Path +from glob import glob +import importlib +import logging + +import click +from app.settings import app_cfg as cfg + + +# -------------------------------------------------------- +# Click Group Class +# -------------------------------------------------------- + +# set global variable during parent class create +dir_plugins = None # set in create + +class ClickComplex: + """Wrapper generator for custom Click CLI's based on LR's coroutine""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + try: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + except Exception as ex: + logging.getLogger('app').error('{}'.format(ex)) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # Based on code from @wiretapped + HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup + + + +class ClickSimple: + """Wrapper generator for custom Click CLI's""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # from HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins # from CliGenerator init + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup diff --git a/cli/app/utils/click_utils.py b/cli/app/utils/click_utils.py new file mode 100644 index 0000000..dc00f58 --- /dev/null +++ b/cli/app/utils/click_utils.py @@ -0,0 +1,62 @@ +""" +Custom Click parameter types +""" +import click + +from app.settings import app_cfg as cfg +from app.settings import types + + +# -------------------------------------------------------- +# Click command helpers +# -------------------------------------------------------- +def enum_to_names(enum_type): + return {x.name.lower(): x for x in enum_type} + +def show_help(enum_type): + names = enum_to_names(enum_type) + return 'Options: "{}"'.format(', '.join(list(names.keys()))) + +def get_default(opt): + return opt.name.lower() + + +# -------------------------------------------------------- +# Custom Click parameter class +# -------------------------------------------------------- + + +class ParamVar(click.ParamType): + + name = 'default_type' + + def __init__(self, param_type): + # self.name = '{}'.format(param_type.name.lower()) + # sealf. + self.ops = {x.name.lower(): x for x in param_type} + + def convert(self, value, param, ctx): + """converts (str) repr to Enum hash""" + try: + return self.ops[value.lower()] + except: + self.fail('{} is not a valid option'.format(value, param, ctx)) + + + + + + + + + + + + + + + + + + + diff --git a/cli/app/utils/color_utils.py b/cli/app/utils/color_utils.py new file mode 100644 index 0000000..e4d33ef --- /dev/null +++ b/cli/app/utils/color_utils.py @@ -0,0 +1,5 @@ +import colorsys + + +def hsv2rgb_int(h,s,v): + return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(h,s,v)) \ No newline at end of file diff --git a/cli/app/utils/display_utils.py b/cli/app/utils/display_utils.py new file mode 100644 index 0000000..8e265ae --- /dev/null +++ b/cli/app/utils/display_utils.py @@ -0,0 +1,28 @@ +import sys + +import cv2 as cv + +from app.utils.logger_utils import Logger + + +log = Logger.getLogger() + +def handle_keyboard(delay_amt=1): + '''Used with cv.imshow('title', image) to wait for keyboard press + ''' + while True: + k = cv.waitKey(delay_amt) & 0xFF + if k == 27 or k == ord('q'): # ESC + cv.destroyAllWindows() + sys.exit() + elif k == 32 or k == 83: # 83 = right arrow + break + elif k != 255: + log.debug(f'k: {k}') + +def handle_keyboard_video(delay_amt=1): + key = cv.waitKey(1) & 0xFF + # if the `q` key was pressed, break from the loop + if key == ord("q"): + cv.destroyAllWindows() + sys.exit() diff --git a/cli/app/utils/draw_utils.py b/cli/app/utils/draw_utils.py new file mode 100644 index 0000000..18c03f2 --- /dev/null +++ b/cli/app/utils/draw_utils.py @@ -0,0 +1,215 @@ +import sys +from math import sqrt + +import numpy as np +import cv2 as cv +import PIL +from PIL import ImageDraw + +from app.utils import im_utils +from app.settings import app_cfg + +log = app_cfg.LOG + +end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1 + +# --------------------------------------------------------------------------- +# +# 3D landmark drawing utilities +# +# --------------------------------------------------------------------------- + +def plot_keypoints(im, kpts): + '''Draw 68 key points + :param im: the input im + :param kpts: (68, 3). flattened list + ''' + im = im.copy() + kpts = np.round(kpts).astype(np.int32) + for i in range(kpts.shape[0]): + st = kpts[i, :2] + im = cv.circle(im, (st[0], st[1]), 1, (0, 0, 255), 2) + if i in end_list: + continue + ed = kpts[i + 1, :2] + im = cv.line(im, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1) + return im + + +def calc_hypotenuse(pts): + bbox = [min(pts[0, :]), min(pts[1, :]), max(pts[0, :]), max(pts[1, :])] + center = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + radius = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 2 + bbox = [center[0] - radius, center[1] - radius, center[0] + radius, center[1] + radius] + llength = sqrt((bbox[2] - bbox[0]) ** 2 + (bbox[3] - bbox[1]) ** 2) + return llength / 3 + +def build_camera_box(rear_size=90): + point_3d = [] + rear_depth = 0 + point_3d.append((-rear_size, -rear_size, rear_depth)) + point_3d.append((-rear_size, rear_size, rear_depth)) + point_3d.append((rear_size, rear_size, rear_depth)) + point_3d.append((rear_size, -rear_size, rear_depth)) + point_3d.append((-rear_size, -rear_size, rear_depth)) + + front_size = int(4 / 3 * rear_size) + front_depth = int(4 / 3 * rear_size) + point_3d.append((-front_size, -front_size, front_depth)) + point_3d.append((-front_size, front_size, front_depth)) + point_3d.append((front_size, front_size, front_depth)) + point_3d.append((front_size, -front_size, front_depth)) + point_3d.append((-front_size, -front_size, front_depth)) + point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3) + + return point_3d + + +def plot_pose_box(im, Ps, pts68s, color=(40, 255, 0), line_width=2): + '''Draw a 3D box as annotation of pose. + ref: https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py + :param image: the input image + :param P: (3, 4). Affine Camera Matrix. + :param kpts: (2, 68) or (3, 68) + ''' + im_draw = im.copy() + if not isinstance(pts68s, list): + pts68s = [pts68s] + + if not isinstance(Ps, list): + Ps = [Ps] + + for i in range(len(pts68s)): + pts68 = pts68s[i] + llength = calc_hypotenuse(pts68) + point_3d = build_camera_box(llength) + P = Ps[i] + + # Map to 2d im points + point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) # n x 4 + point_2d = point_3d_homo.dot(P.T)[:, :2] + + point_2d[:, 1] = - point_2d[:, 1] + point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(pts68[:2, :27], 1) + point_2d = np.int32(point_2d.reshape(-1, 2)) + + # Draw all the lines + cv.polylines(im_draw, [point_2d], True, color, line_width, cv.LINE_AA) + cv.line(im_draw, tuple(point_2d[1]), tuple(point_2d[6]), color, line_width, cv.LINE_AA) + cv.line(im_draw, tuple(point_2d[2]), tuple(point_2d[7]), color, line_width, cv.LINE_AA) + cv.line(im_draw, tuple(point_2d[3]), tuple(point_2d[8]), color, line_width, cv.LINE_AA) + + return im_draw + + + +# --------------------------------------------------------------------------- +# +# OpenCV drawing functions +# +# --------------------------------------------------------------------------- + +pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)} + +def draw_landmarks2d_cv(im, points_norm, radius=3, color=(0,255,0)): + '''Draws facial landmarks, either 5pt or 68pt + ''' + im_dst = im.copy() + dim = im.shape[:2][::-1] + for x,y in points_norm: + pt = (int(x*dim[0]), int(y*dim[1])) + cv.circle(im_dst, pt, radius, color, -1, cv.LINE_AA) + return im_dst + +def draw_landmarks2d_pil(im_pil, points_norm, radius=3, color=(0,255,0)): + '''Draws facial landmarks, either 5pt or 68pt + ''' + assert im_utils.is_pil(im_pil) + draw = ImageDraw.Draw(im_pil) + dim = im.shape[:2][::-1] + for x,y in points_norm: + x1, y1 = (int(x*dim[0]), int(y*dim[1])) + xyxy = (x1, y1, x1+radius, y1+radius) + draw.ellipse(xyxy, fill='white') + del draw + im_dst = im_utils.ensure_np(im_pil) + im_dst = im_utils.rgb2bgr(im_dst) + return im_dst + + +def draw_landmarks3D_cv(im, points, radius=3, color=(0,255,0)): + '''Draws 3D facial landmarks + ''' + im_dst = im.copy() + for x,y,z in points: + cv.circle(im_dst, (x,y), radius, color, -1, cv.LINE_AA) + return im_dst + + +def draw_bbox_cv(im_np, bbox_norm, color=(0,255,0), stroke_weight=2): + '''Draws BBox onto cv image + ''' + bbox_dim = bbox_norm.to_bbox_dim(im_np.shape[:2][::-1]) + return cv.rectangle(im_np, bbox_dim.p1.xy, bbox_dim.p2.xy, color, stroke_weight, cv.LINE_AA) + + +def draw_bbox_pil(im, bboxes_norm, color=(0,255,0), stroke_weight=2): + '''Draws BBox onto cv image + :param color: RGB value + ''' + if im_utils.is_np(im): + im = im_utils.np2pil(im) + was_np = True + else: + was_np = False + + if not type(bboxes_norm) == list: + bboxes_norm = [bboxes_norm] + + + im_draw = ImageDraw.ImageDraw(im) + + for bbox_norm in bboxes_norm: + bbox_dim = bbox_norm.to_bbox_dim(im.size) + xyxy = (bbox_dim.p1.xy, bbox_dim.p2.xy) + im_draw.rectangle(xyxy, outline=color, width=stroke_weight) + del im_draw + + if was_np: + im = im_utils.pil2np(im) + return im + + +def draw_pose(im, pt_nose, image_pts): + '''Draws 3-axis pose over image + TODO: normalize point data + ''' + im_dst = im.copy() + log.debug(f'pt_nose: {pt_nose}') + log.debug(f'image_pts pitch: {image_pts["pitch"]}') + cv.line(im_dst, pt_nose, tuple(image_pts['pitch']), pose_types['pitch'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['yaw']), pose_types['yaw'], 3) + cv.line(im_dst, pt_nose, tuple(image_pts['roll']), pose_types['roll'], 3) + return im_dst + +def draw_text_cv(im, pt_norm, text, size=1.0, color=(0,255,0)): + '''Draws degrees as text over image + ''' + im_dst = im.copy() + dim = im.shape[:2][::-1] + pt = tuple(map(int, (pt_norm[0]*dim[0], pt_norm[1]*dim[1]))) + cv.putText(im_dst, text, pt, cv.FONT_HERSHEY_SIMPLEX, size, color, thickness=1, lineType=cv.LINE_AA) + return im_dst + + +def draw_degrees(im, pose_data, color=(0,255,0)): + '''Draws degrees as text over image + ''' + im_dst = im.copy() + for i, pose_type in enumerate(pose_types.items()): + k, clr = pose_type + v = pose_data[k] + t = '{}: {:.2f}'.format(k, v) + origin = (10, 30 + (25 * i)) + cv.putText(im_dst, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2) + return im_dst \ No newline at end of file diff --git a/cli/app/utils/file_utils.py b/cli/app/utils/file_utils.py new file mode 100644 index 0000000..5feab32 --- /dev/null +++ b/cli/app/utils/file_utils.py @@ -0,0 +1,461 @@ +""" +File utilities +""" +import sys +import os +from os.path import join +import stat + +from glob import glob +from pprint import pprint +import shutil +import distutils +import pathlib +from pathlib import Path +import json +import csv +import pickle +import threading +from queue import Queue +import time +import logging +import itertools +import collections + +import hashlib +import pymediainfo +import click +from tqdm import tqdm +import cv2 as cv +from PIL import Image +import imutils + +from app.settings import app_cfg as cfg +from app.settings import types + +log = logging.getLogger(cfg.LOGGER_NAME) + + +# ------------------------------------------ +# File I/O read/write little helpers +# ------------------------------------------ + +def glob_multi(dir_in, exts=['jpg'], recursive=False): + files = [] + for ext in exts: + if recursive: + fp_glob = join(dir_in, '**/*.{}'.format(ext)) + log.info(f'glob {fp_glob}') + files += glob(fp_glob, recursive=True) + else: + fp_glob = join(dir_in, '*.{}'.format(ext)) + files += glob(fp_glob) + return files + + +def zpad(x, zeros=cfg.ZERO_PADDING): + return str(x).zfill(zeros) + +def get_ext(fpp, lower=True): + """Retuns the file extension w/o dot + :param fpp: (Pathlib.path) filepath + :param lower: (bool) force lowercase + :returns: (str) file extension (ie 'jpg') + """ + fpp = ensure_posixpath(fpp) + ext = fpp.suffix.replace('.', '') + return ext.lower() if lower else ext + + +def convert(fp_in, fp_out): + """Converts between JSON and Pickle formats + Pickle files are about 30-40% smaller filesize + """ + if get_ext(fp_in) == get_ext(fp_out): + log.error('Input: {} and output: {} are the same. Use this to convert.') + + lazywrite(lazyload(fp_in), fp_out) + + +def load_csv(fp_in, as_list=True): + """Loads CSV and retuns list of items + :param fp_in: string filepath to CSV + :returns: list of all CSV data + """ + if not Path(fp_in).exists(): + log.info('not found: {}'.format(fp_in)) + log.info('loading: {}'.format(fp_in)) + with open(fp_in, 'r') as fp: + items = csv.DictReader(fp) + if as_list: + items = [x for x in items] + log.info('returning {:,} items'.format(len(items))) + return items + +def unfussy_csv_reader(reader): + """Loads a CSV while ignoring possible data errors + :param reader: Special reader for load_csv_safe which ignores CSV parse errors + """ + while True: + try: + yield next(reader) + except StopIteration: + return + except csv.Error: + print(csv.Error) + # log the problem or whatever + continue + +def load_csv_safe(fp_in, keys=True, create=False): + """Loads a CSV while ignoring possible data errors + :param fp_in: string filepath to JSON file + :param keys: boolean set to false if the first line is not headers (for some reason) + :param create: boolean set to true to return an empty keys/values if the CSV does not exist + """ + try: + with open(fp_in, 'r', newline='', encoding='utf-8') as f: + # reader = csv.reader( (line.replace('\0','') for line in f) ) + reader = csv.reader(f) + lines = list(unfussy_csv_reader(reader)) + if keys: + keys = lines[0] + lines = lines[1:] + return keys, lines + return lines + except: + if create: + if keys: + return {}, [] + return [] + raise + +def load_recipe(fp_in): + """Loads a JSON file as an object with properties accessible with dot syntax + :param fp_in: string filepath to JSON file + """ + with open(path) as fh: + return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values())) + + +def lazywrite(data, fp_out, sort_keys=True): + """Writes JSON or Pickle data""" + ext = get_ext(fp_out) + if ext == 'json': + return write_json(data, fp_out, sort_keys=sort_keys) + elif ext == 'pkl': + return write_pickle(data, fp_out) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + +def lazyload(fp_in, ordered=True): + """Loads JSON or Pickle serialized data""" + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + ext = get_ext(fp_in) + if ext == 'json': + items = load_json(fp_in) + elif ext == 'pkl': + items = load_pickle(fp_in) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + if ordered: + return collections.OrderedDict(sorted(items.items(), key=lambda t: t[0])) + else: + return items + + +def load_text(fp_in): + with open(fp_in, 'rt') as fp: + lines = fp.read().rstrip('\n').split('\n') + return lines + +def load_json(fp_in): + """Loads JSON and returns items + :param fp_in: (str) filepath + :returns: data from JSON + """ + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + with open(str(fp_in), 'r') as fp: + data = json.load(fp) + return data + + +def load_pickle(fp_in): + """Loads Pickle and returns items + :param fp_in: (str) filepath + :returns: data from JSON + """ + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + with open(str(fp_in), 'rb') as fp: + data = pickle.load(fp) + return data + + +def order_items(records): + """Orders records by ASC SHA256""" + return collections.OrderedDict(sorted(records.items(), key=lambda t: t[0])) + +def write_text(data, fp_out, ensure_path=True): + if not data: + log.error('no data') + return + + if ensure_path: + mkdirs(fp_out) + with open(fp_out, 'w') as fp: + if type(data) == list: + fp.write('\n'.join(data)) + else: + fp.write(data) + + +def write_pickle(data, fp_out, ensure_path=True): + """ + """ + if ensure_path: + mkdirs(fp_out) # mkdir + with open(fp_out, 'wb') as fp: + pickle.dump(data, fp) + + +def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False): + """ + """ + if ensure_path: + mkdirs(fp_out) + with open(fp_out, 'w') as fp: + if minify: + json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys) + else: + json.dump(data, fp, indent=2, sort_keys=sort_keys) + if verbose: + log.info('Wrote JSON: {}'.format(fp_out)) + +def write_csv(data, fp_out, header=None): + """Write a CSV of key-value pairs""" + with open(fp_out, 'w') as fp: + writer = csv.DictWriter(fp, fieldnames=header) + writer.writeheader() + if type(data) is dict: + for k, v in data.items(): + fp.writerow('{},{}'.format(k, v)) + +def write_csv_table(fn, keys, rows): + """Write a CSV of columns and rows""" + with open(fn, 'w', encoding='utf-8') as f: + writer = csv.writer(f) + if keys is not None: + writer.writerow(keys) + for row in rows: + writer.writerow(row) + + +def write_serialized_items(items, fp_out, ensure_path=True, minify=True, sort_keys=True): + """Writes serialized data + :param items: (dict) a sha256 dict of MappingItems + :param serialize: (bool) serialize the data + :param ensure_path: ensure the parent directories exist + :param minify: reduces JSON file size + """ + log.info('Writing serialized data...') + fpp_out = ensure_posixpath(fp_out) + serialized_items = {k: v.serialize() for k, v in tqdm(items.items()) } + # write data + ext = get_ext(fpp_out) + if ext == 'json': + write_json(serialized_items, fp_out, ensure_path=ensure_path, minify=minify, sort_keys=sort_keys) + elif ext == 'pkl': + write_pickle(serialized_items, fp_out) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + log.info('Wrote {:,} items to {}'.format(len(items), fp_out)) + + +def write_modeled_data(data, fp_out, ensure_path=False): + """ + """ + fpp_out = ensure_posixpath(fp_out) + if ensure_path: + mkdirs(fpp_out) + ext = get_ext(fpp_out) + if ext == 'pkl': + write_pickle(data, str(fp_out)) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + +# --------------------------------------------------------------------- +# Filepath utilities +# --------------------------------------------------------------------- + +def ensure_posixpath(fp): + """Ensures filepath is pathlib.Path + :param fp: a (str, LazyFile, PosixPath) + :returns: a PosixPath filepath object + """ + if type(fp) == str: + fpp = Path(fp) + elif type(fp) == click.utils.LazyFile: + fpp = Path(fp.name) + elif type(fp) == pathlib.PosixPath: + fpp = fp + else: + raise TypeError('{} is not a valid filepath type'.format(type(fp))) + return fpp + + +def mkdirs(fp): + """Ensure parent directories exist for a filepath + :param fp: string, Path, or click.File + """ + fpp = ensure_posixpath(fp) + fpp = fpp.parent if fpp.suffix else fpp + fpp.mkdir(parents=True, exist_ok=True) + + +def ext_media_format(ext): + """Converts file extension into Enum MediaType + param ext: str of file extension" + """ + for media_format, exts in cfg.VALID_MEDIA_EXTS.items(): + if ext in exts: + return media_format + raise ValueError('{} is not a valid option'.format(ext)) + + +def sha256(fp_in, block_size=65536): + """Generates SHA256 hash for a file + :param fp_in: (str) filepath + :param block_size: (int) byte size of block + :returns: (str) hash + """ + sha256 = hashlib.sha256() + with open(fp_in, 'rb') as fp: + for block in iter(lambda: fp.read(block_size), b''): + sha256.update(block) + return sha256.hexdigest() + + +def sha256_tree(sha256): + """Split hash into branches with tree-depth for faster file indexing + :param sha256: str of a sha256 hash + :returns: str with sha256 tree with '/' delimeter + """ + branch_size = cfg.HASH_BRANCH_SIZE + tree_size = cfg.HASH_TREE_DEPTH * branch_size + sha256_tree = [sha256[i:(i+branch_size)] for i in range(0, tree_size, branch_size)] + return '/'.join(sha256_tree) + + +def migrate(fmaps, threads=1, action='copy', force=False): + """Copy/move/symlink files form src to dst directory + :param fmaps: (dict) with 'src' and 'dst' filepaths + :param threads: (int) number of threads + :param action: (str) copy/move/symlink + :param force: (bool) force overwrite existing files + """ + log = log + num_items = len(fmaps) + + def copytree(src, dst, symlinks = False, ignore = None): + # ozxyqk: https://stackoverflow.com/questions/22588225/how-do-you-merge-two-directories-or-move-with-replace-from-the-windows-command + if not os.path.exists(dst): + mkdirs(dst) + # os.makedirs(dst) + shutil.copystat(src, dst) + lst = os.listdir(src) + if ignore: + excl = ignore(src, lst) + lst = [x for x in lst if x not in excl] + for item in lst: + s = os.path.join(src, item) + d = os.path.join(dst, item) + if symlinks and os.path.islink(s): + if os.path.exists(d): + os.remove(d) + os.symlink(os.readlink(s), d) + try: + st = os.lstat(s) + mode = stat.S_IMODE(st.st_mode) + os.lchmod(d, mode) + except: + pass # lchmod not available + elif os.path.isdir(s): + copytree(s, d, symlinks, ignore) + else: + shutil.copy(s, d) + + assert(action in ['copy','move','symlink']) + + if threads > 1: + # threaded + task_queue = Queue() + print_lock = threading.Lock() + + def migrate_action(fmap): + data_local = threading.local() + data_local.src, data_local.dst = (fmap['src'], fmap['dst']) + data_local.src_path = Path(data_local.src) + data_local.dst_path = Path(data_local.dst) + + if force or not data_local.dst_path.exists(): + if action == 'copy': + shutil.copy(data_local.src, data_local.dst) + #if data_local.src_path.is_dir(): + # copytree(data_local.src, data_local.dst) + #else: + elif action == 'move': + shutil.move(data_local.src, data_local.dst) + elif action == 'symlink': + if force: + data_local.dst_path.unlink() + Path(data_local.src).symlink_to(data_local.dst) + + def process_queue(num_items): + # TODO: progress bar + while True: + fmap = task_queue.get() + migrate_action(fmap) + log.info('migrate: {:.2f} {:,}/{:,}'.format( + (task_queue.qsize() / num_items)*100, task_queue.qsize(), num_items)) + task_queue.task_done() + + # avoid race conditions by creating dir structure here + log.info('create directory structure') + for fmap in tqdm(fmaps): + mkdirs(fmap['dst']) + + # init threads + for i in range(threads): + t = threading.Thread(target=process_queue, args=(num_items,)) + t.daemon = True + t.start() + + # process threads + start = time.time() + for fmap in fmaps: + task_queue.put(fmap) + + task_queue.join() + + else: + # non-threaded + for fmap in tqdm(fmaps): + mkdirs(fmap['dst']) + if action == 'copy': + shutil.copy(fmap['src'], fmap['dst']) + elif action == 'move': + shutil.move(fmap['src'], fmap['dst']) + elif action == 'symlink': + if force: + Path(fmap['dst'].unlink()) + Path(fp_src).symlink_to(fp_dst) + return + diff --git a/cli/app/utils/identity_utils.py b/cli/app/utils/identity_utils.py new file mode 100644 index 0000000..5855fbb --- /dev/null +++ b/cli/app/utils/identity_utils.py @@ -0,0 +1,161 @@ +import os +from pathlib import Path +from glob import glob +import unidecode +import difflib + +from app.settings import types +from app.models.data_store import DataStore +from app.utils import logger_utils + +log = logger_utils.Logger.getLogger() + +az = 'abcdefghijklmlopqrstuvwzxyz' +AZ = az.upper() +z9 = list(map(str, list(range(0,10)))) +aZ9 = list(az) + list(AZ) + z9 + +def letter_strip(a, b=aZ9): + # strip every letter from a that is not in b + return ''.join([x for x in a if x in b]) + +def letter_match(a, b): + # check if every letter (a-zA-Z0-9) exists in both + return sum([x in b for x in a]) == len(a) + +def names_match_strict(a, b): + clean_a = letter_strip(a) + clean_b = letter_strip(b) + return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a) + + +def sanitize_name(name, as_str=False): + splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')] + if as_str: + return ' '.join(splits) + else: + return splits + +''' +class Dataset(Enum): + LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \ + CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16) +''' +# Get list of names based on Dataset type +def get_names(opt_dataset, opt_data_store=types.DataStore.HDD): + data_store = DataStore(opt_data_store, opt_dataset) + dir_dataset = data_store.dir_dataset # path to dataset root + dir_media_orig = data_store.dir_media_original + if opt_dataset == types.Dataset.AFW: + # Annotated Faces in the Wild + pass + elif opt_dataset == types.Dataset.BRAINWASH: + # Brainwash IP Cam dataset + pass + elif opt_dataset == types.Dataset.CASIA_WEBFACE: + # + pass + elif opt_dataset == types.Dataset.HELEN: + # Helen + pass + elif opt_dataset == types.Dataset.IMDB_WIKI: + # University of Tennessee Knoxville + pass + elif opt_dataset == types.Dataset.LAG: + # Large Age Gap + pass + elif opt_dataset == types.Dataset.LFW: + # Labeled Faces in The Wild + names_orig = [x for x in os.listdir(dir_media_orig)] + names_query = [x.replace('_', ' ') for x in names_orig] + elif opt_dataset == types.Dataset.MEGAFACE: + # MegaFace + pass + elif opt_dataset == types.Dataset.MSCELEB: + # MS Celeb + pass + elif opt_dataset == types.Dataset.PIPA: + # People in Photo Albums + pass + elif opt_dataset == types.Dataset.PUBFIG83: + # PubFig83 + names_orig = [x for x in os.listdir(dir_media_orig) if Path(x).suffix is not '.txt'] + names_query = [x.replace('_', ' ') for x in names_orig] + elif opt_dataset == types.Dataset.SCUT_FBP: + # SCUT Facial Beauty Perception + pass + elif opt_dataset == types.Dataset.UCCS: + # Unconstrianed College Students + pass + elif opt_dataset == types.Dataset.UMD_FACES: + # University of Maryland Faces + pass + elif opt_dataset == types.Dataset.UTK: + # University of Tennessee Knoxville + pass + elif opt_dataset == types.Dataset.UCF_SELFIE: + # University of Central Florida Selfie + pass + elif opt_dataset == types.Dataset.VGG_FACE: + # Visual Geometry Group Face 1 + pass + elif opt_dataset == types.Dataset.VGG_FACE2: + # Visual Geometry Group Face 2 + pass + else: + log.warn(f'{opt_dataset} not yet implemented') + names_orig = [] + names_query = [] + result = {'names_orig': names_orig, 'names_query': names_query} + return result + + +def similarity(a, b): + return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio() + +def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False): + '''Returns boolean if names are similar enough + ''' + # strip spaces and split names into list of plain text words + if name_a_pre: + name_a_clean = name_a + else: + name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')] + if name_b_pre: + name_b_clean = name_b + else: + name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')] + + # assign short long vars + len_a = len(name_a_clean) + len_b = len(name_b_clean) + len_min = min(len_a, len_b) + len_max = max(len_a, len_b) + + # compute scores + scores = [] + for i in range(len(name_a_clean)): + word_a = name_a_clean[i] + subscores = [] + for j in range(len(name_b_clean)): + word_b = name_b_clean[j] + score = similarity(word_a, word_b) + subscores.append(score) + scores.append(subscores) + + # return result + ratio_similar = sum(max(x) for x in scores) / len(scores) + + if compound_score: + # combine with any missing letters/words + letters_a = sum(len(x) for x in name_a_clean) + letters_b = sum(len(x) for x in name_b_clean) + ratio_letters = min(letters_a, letters_b) / max(letters_a, letters_b) + score = (0.8 * ratio_similar) + (0.2 * ratio_letters) + else: + score = ratio_similar + + if as_float: + return score + else: + return score > threshold \ No newline at end of file diff --git a/cli/app/utils/im_utils.py b/cli/app/utils/im_utils.py new file mode 100644 index 0000000..ab6467e --- /dev/null +++ b/cli/app/utils/im_utils.py @@ -0,0 +1,556 @@ +import sys +import os +from os.path import join +import cv2 as cv +import imagehash +from PIL import Image, ImageDraw, ImageFilter, ImageOps +from skimage.filters.rank import entropy +from skimage.morphology import disk +from skimage import feature +# import matplotlib.pyplot as plt +import imutils +import time +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +import datetime + +def ensure_pil(im): + """Ensure image is Pillow format""" + try: + im.verify() + return im + except: + return Image.fromarray(im.astype('uint8'), 'RGB') + +def ensure_np(im): + """Ensure image is numpy array""" + if type(im) == np.ndarray: + return im + return np.asarray(im, np.uint8) + +def num_channels(im): + '''Returns number of channels in numpy.ndarray image''' + if len(im.shape) > 2: + return im.shape[2] + else: + return 1 + +def is_grayscale(im, threshold=5): + """Returns True if image is grayscale + :param im: (numpy.array) image + :return (bool) of if image is grayscale""" + b = im[:,:,0] + g = im[:,:,1] + mean = np.mean(np.abs(g - b)) + return mean < threshold + + +def compute_features(fe,frames,phashes,phash_thresh=1): + """ + Get vector embedding using FeatureExtractor + :param fe: FeatureExtractor class + :param frames: list of frame images as numpy.ndarray + :param phash_thresh: perceptual hash threshold + :returns: list of feature vectors + """ + vals = [] + phash_pre = phashes[0] + for i,im in enumerate(frames): + if i == 0 or (phashes[i] - phashes[i-1]) > phash_thresh: + vals.append(fe.extract(im)) + else: + vals.append(vals[i-1]) + return vals + + +def np2pil(im, swap=True): + """Ensure image is Pillow format + :param im: image in numpy or PIL.Image format + :returns: image in Pillow RGB format + """ + try: + im.verify() + return im + except: + if swap: + im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + return Image.fromarray(im.astype('uint8'), 'RGB') + +def pil2np(im, swap=True): + """Ensure image is Numpy.ndarry format + :param im: image in numpy or PIL.Image format + :returns: image in Numpy uint8 format + """ + if type(im) == np.ndarray: + return im + im = np.asarray(im, np.uint8) + if swap: + im = cv.cvtColor(im, cv.COLOR_RGB2BGR) + return im + + +def resize(im, width=0, height=0): + """resize image using imutils. Use w/h=[0 || None] to prioritize other edge size + :param im: a Numpy.ndarray image + :param wh: a tuple of (width, height) + """ + # TODO change to cv.resize and add algorithm choices + w = width + h = height + if w is 0 and h is 0: + return im + elif w > 0 and h > 0: + ws = im.shape[1] / w + hs = im.shape[0] / h + if ws > hs: + return imutils.resize(im, width=w) + else: + return imutils.resize(im, height=h) + elif w > 0 and h is 0: + return imutils.resize(im, width=w) + elif w is 0 and h > 0: + return imutils.resize(im, height=h) + else: + return im + +def filter_pixellate(im,num_cells): + """Pixellate image by downsample then upsample + :param im: PIL.Image + :returns: PIL.Image + """ + w,h = im.size + im = im.resize((num_cells,num_cells), Image.NEAREST) + im = im.resize((w,h), Image.NEAREST) + return im + +# Plot images inline using Matplotlib +# def pltimg(im,title=None,mode='rgb',figsize=(8,12),dpi=160,output=None): +# plt.figure(figsize=figsize) +# plt.xticks([]),plt.yticks([]) +# if title is not None: +# plt.title(title) +# if mode.lower() == 'bgr': +# im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + +# f = plt.gcf() +# if mode.lower() =='grey' or mode.lower() == 'gray': +# plt.imshow(im,cmap='gray') +# else: +# plt.imshow(im) +# plt.show() +# plt.draw() +# if output is not None: +# bbox_inches='tight' +# ext=osp.splitext(output)[1].replace('.','') +# f.savefig(output,dpi=dpi,format=ext) +# print('Image saved to: {}'.format(output)) + + + +# Utilities for analyzing frames + +def compute_gray(im): + im = cv.cvtColor(im,cv.COLOR_BGR2GRAY) + n_vals = float(im.shape[0] * im.shape[1]) + avg = np.sum(im[:]) / n_vals + return avg + +def compute_rgb(im): + im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + n_vals = float(im.shape[0] * im.shape[1]) + avg_r = np.sum(im[:,:,0]) / n_vals + avg_g = np.sum(im[:,:,1]) / n_vals + avg_b = np.sum(im[:,:,2]) / n_vals + avg_rgb = np.sum(im[:,:,:]) / (n_vals * 3.0) + return avg_r, avg_b, avg_g, avg_rgb + +def compute_hsv(im): + im = cv.cvtColor(im,cv.COLOR_BGR2HSV) + n_vals = float(im.shape[0] * im.shape[1]) + avg_h = np.sum(frame[:,:,0]) / n_vals + avg_s = np.sum(frame[:,:,1]) / n_vals + avg_v = np.sum(frame[:,:,2]) / n_vals + avg_hsv = np.sum(frame[:,:,:]) / (n_vals * 3.0) + return avg_h, avg_s, avg_v, avg_hsv + +def pys_dhash(im, hashSize=8): + # resize the input image, adding a single column (width) so we + # can compute the horizontal gradient + resized = cv.resize(im, (hashSize + 1, hashSize)) + # compute the (relative) horizontal gradient between adjacent + # column pixels + diff = resized[:, 1:] > resized[:, :-1] + # convert the difference image to a hash + return sum([2 ** i for (i, v) in enumerate(diff.flatten()) if v]) + + +############################################ +# ImageHash +# pip install imagehash +############################################ + + +def compute_ahash(im): + """Compute average hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.average_hash(ensure_pil(im_pil)) + +def compute_phash(im): + """Compute perceptual hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.phash(ensure_pil(im)) + +def compute_dhash(im): + """Compute difference hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.dhash(ensure_pil(im)) + +def compute_whash(im): + """Compute wavelet hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.whash(ensure_pil(im)) + +def compute_whash_b64(im): + """Compute wavelest hash base64 using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return lambda im: imagehash.whash(ensure_pil(im), mode='db4') + + +############################################ +# Pillow +############################################ + +def sharpen(im): + """Sharpen image using PIL.ImageFilter + param: im: PIL.Image + returns: PIL.Image + """ + im = ensure_pil(im) + im.filter(ImageFilter.SHARPEN) + return ensure_np(im) + +def fit_image(im,targ_size): + """Force fit image by cropping + param: im: PIL.Image + param: targ_size: a tuple of target (width, height) + returns: PIL.Image + """ + im_pil = ensure_pil(im) + frame_pil = ImageOps.fit(im_pil, targ_size, + method=Image.BICUBIC, centering=(0.5, 0.5)) + return ensure_np(frame_pil) + + +def compute_entropy(im): + entr_img = entropy(im, disk(10)) + + +############################################ +# scikit-learn +############################################ + +def compute_entropy(im): + # im is grayscale numpy + return entropy(im, disk(10)) + +############################################ +# OpenCV +############################################ + +def bgr2gray(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (BGR) + :returns: Numpy.ndarray (Gray) + """ + return cv.cvtColor(im,cv.COLOR_BGR2GRAY) + +def gray2bgr(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (Gray) + :returns: Numpy.ndarray (BGR) + """ + return cv.cvtColor(im,cv.COLOR_GRAY2BGR) + +def bgr2rgb(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (BGR) + :returns: Numpy.ndarray (RGB) + """ + return cv.cvtColor(im,cv.COLOR_BGR2RGB) + +def rgb2bgr(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (BGR) + :returns: Numpy.ndarray (RGB) + """ + return cv.cvtColor(im,cv.COLOR_RGB2BGR) + +def compute_laplacian(im): + # below 100 is usually blurry + return cv.Laplacian(im, cv.CV_64F).var() + + +# http://radjkarl.github.io/imgProcessor/index.html# + +def modifiedLaplacian(img): + ''''LAPM' algorithm (Nayar89)''' + M = np.array([-1, 2, -1]) + G = cv.getGaussianKernel(ksize=3, sigma=-1) + Lx = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=M, kernelY=G) + Ly = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=G, kernelY=M) + FM = np.abs(Lx) + np.abs(Ly) + return cv.mean(FM)[0] + +def varianceOfLaplacian(img): + ''''LAPV' algorithm (Pech2000)''' + lap = cv.Laplacian(img, ddepth=-1)#cv.cv.CV_64F) + stdev = cv.meanStdDev(lap)[1] + s = stdev[0]**2 + return s[0] + +def tenengrad(img, ksize=3): + ''''TENG' algorithm (Krotkov86)''' + Gx = cv.Sobel(img, ddepth=cv.CV_64F, dx=1, dy=0, ksize=ksize) + Gy = cv.Sobel(img, ddepth=cv.CV_64F, dx=0, dy=1, ksize=ksize) + FM = Gx**2 + Gy**2 + return cv.mean(FM)[0] + +def normalizedGraylevelVariance(img): + ''''GLVN' algorithm (Santos97)''' + mean, stdev = cv.meanStdDev(img) + s = stdev[0]**2 / mean[0] + return s[0] + +def is_blank(im,width=100,sigma=0,thresh_canny=.1,thresh_mean=4,mask=None): + # im is graysacale np + #im = imutils.resize(im,width=width) + #mask = imutils.resize(mask,width=width) + if mask is not None: + im_canny = feature.canny(im,sigma=sigma,mask=mask) + total = len(np.where(mask > 0)[0]) + else: + im_canny = feature.canny(im,sigma=sigma) + total = (im.shape[0]*im.shape[1]) + n_white = len(np.where(im_canny > 0)[0]) + per = n_white/total + if np.mean(im) < thresh_mean or per < thresh_canny: + return 1 + else: + return 0 + + +def print_timing(t,n): + t = time.time()-t + print('Elapsed time: {:.2f}'.format(t)) + print('FPS: {:.2f}'.format(n/t)) + +def vid2frames(fpath, limit=5000, width=None, idxs=None): + """Convert a video file into list of frames + :param fpath: filepath to the video file + :param limit: maximum number of frames to read + :param fpath: the indices of frames to keep (rest are skipped) + :returns: (fps, number of frames, list of Numpy.ndarray frames) + """ + frames = [] + try: + cap = cv.VideoCapture(fpath) + except: + print('[-] Error. Could not read video file: {}'.format(fpath)) + try: + cap.release() + except: + pass + return frames + + fps = cap.get(cv.CAP_PROP_FPS) + nframes = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) + + if idxs is not None: + # read sample indices by seeking to frame index + for idx in idxs: + cap.set(cv.CAP_PROP_POS_FRAMES, idx) + res, frame = cap.read() + if width is not None: + frame = imutils.resize(frame, width=width) + frames.append(frame) + else: + while(True and len(frames) < limit): + res, frame = cap.read() + if not res: + break + if width is not None: + frame = imutils.resize(frame, width=width) + frames.append(frame) + + cap.release() + del cap + #return fps,nframes,frames + return frames + +def convolve_filter(vals,filters=[1]): + for k in filters: + vals_tmp = np.zeros_like(vals) + t = len(vals_tmp) + for i,v in enumerate(vals): + sum_vals = vals[max(0,i-k):min(t-1,i+k)] + vals_tmp[i] = np.mean(sum_vals) + vals = vals_tmp.copy() + return vals + +def cosine_delta(v1,v2): + return 1.0 - cosine_similarity(v1.reshape((1, -1)), v2.reshape((1, -1)))[0][0] + + + +def compute_edges(vals): + # find edges (1 = rising, -1 = falling) + edges = np.zeros_like(vals) + for i in range(len(vals[1:])): + delta = vals[i] - vals[i-1] + if delta == -1: + edges[i] = 1 # rising edge 0 --> 1 + elif delta == 1: + edges[i+1] = 2 # falling edge 1 --> 0 + # get index for rise fall + rising = np.where(np.array(edges) == 1)[0] + falling = np.where(np.array(edges) == 2)[0] + return rising, falling + + +############################################ +# Point, Rect +############################################ + +class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + +class Rect(object): + def __init__(self, p1, p2): + '''Store the top, bottom, left and right values for points + p1 and p2 are the (corners) in either order + ''' + self.left = min(p1.x, p2.x) + self.right = max(p1.x, p2.x) + self.top = min(p1.y, p2.y) + self.bottom = max(p1.y, p2.y) + +def overlap(r1, r2): + '''Overlapping rectangles overlap both horizontally & vertically + ''' + return range_overlap(r1.left, r1.right, r2.left, r2.right) and \ + range_overlap(r1.top, r1.bottom, r2.top, r2.bottom) + +def range_overlap(a_min, a_max, b_min, b_max): + '''Neither range is completely greater than the other + ''' + return (a_min <= b_max) and (b_min <= a_max) + +def merge_rects(r1,r2): + p1 = Point(min(r1.left,r2.left),min(r1.top,r2.top)) + p2 = Point(max(r1.right,r2.right),max(r1.bottom,r2.bottom)) + return Rect(p1,p2) + +def is_overlapping(r1,r2): + """r1,r2 as [x1,y1,x2,y2] list""" + r1x = Rect(Point(r1[0],r1[1]),Point(r1[2],r1[3])) + r2x = Rect(Point(r2[0],r2[1]),Point(r2[2],r2[3])) + return overlap(r1x,r2x) + +def get_rects_merged(rects,bounds,expand=0): + """rects: list of points in [x1,y1,x2,y2] format""" + rects_expanded = [] + bx,by = bounds + # expand + for x1,y1,x2,y2 in rects: + x1 = max(0,x1-expand) + y1 = max(0,y1-expand) + x2 = min(bx,x2+expand) + y2 = min(by,y2+expand) + rects_expanded.append(Rect(Point(x1,y1),Point(x2,y2))) + + #rects_expanded = [Rect(Point(x1,y1),Point(x2,y2)) for x1,y1,x2,y2 in rects_expanded] + rects_merged = [] + for i,r in enumerate(rects_expanded): + found = False + for j,rm in enumerate(rects_merged): + if overlap(r,rm): + rects_merged[j] = merge_rects(r,rm) #expand + found = True + if not found: + rects_merged.append(r) + # convert back to [x1,y1,x2,y2] format + rects_merged = [(r.left,r.top,r.right,r.bottom) for r in rects_merged] + # contract + rects_contracted = [] + for x1,y1,x2,y2 in rects_merged: + x1 = min(bx,x1+expand) + y1 = min(by,y1+expand) + x2 = max(0,x2-expand) + y2 = max(0,y2-expand) + rects_contracted.append((x1,y1,x2,y2)) + + return rects_contracted + + +############################################ +# Image display +############################################ + + +def montage(frames,ncols=4,nrows=None,width=None): + """Convert list of frames into a grid montage + param: frames: list of frames as Numpy.ndarray + param: ncols: number of columns + param: width: resize images to this width before adding to grid + returns: Numpy.ndarray grid of all images + """ + + # expand image size if not enough frames + if nrows is not None and len(frames) < ncols * nrows: + blank = np.zeros_like(frames[0]) + n = ncols * nrows - len(frames) + for i in range(n): frames.append(blank) + + rows = [] + for i,im in enumerate(frames): + if width is not None: + im = imutils.resize(im,width=width) + h,w = im.shape[:2] + if i % ncols == 0: + if i > 0: + rows.append(ims) + ims = [] + ims.append(im) + if len(ims) > 0: + for j in range(ncols-len(ims)): + ims.append(np.zeros_like(im)) + rows.append(ims) + row_ims = [] + for row in rows: + row_im = np.hstack(np.array(row)) + row_ims.append(row_im) + contact_sheet = np.vstack(np.array(row_ims)) + return contact_sheet + + +def make_np_im(wh, color=(0,0,0)): + '''Creates Numpy image + :param wh: (int, int) width height + :param color: (int, int, int) in RGB + ''' + w,h = wh + im = np.ones([h,w,3], dtype=np.uint8) + im[:] = color[::-1] + return im \ No newline at end of file diff --git a/cli/app/utils/logger_utils.py b/cli/app/utils/logger_utils.py new file mode 100644 index 0000000..f7c9eec --- /dev/null +++ b/cli/app/utils/logger_utils.py @@ -0,0 +1,68 @@ +""" +Logger instantiator for use with Click utlity scripts +""" +import sys +import os +import logging + +import colorlog + +from app.settings import app_cfg as cfg + + +class Logger: + + logger_name = 'MEGAPIXELS' + + def __init__(self): + pass + + @staticmethod + def create(verbosity=4, logfile=None): + """Configures a logger from click params + :param verbosity: (int) between 0 and 5 + :param logfile: (str) path to logfile + :returns: logging root object + """ + + loglevel = (5 - (max(0, min(verbosity, 5)))) * 10 # where logging.DEBUG = 10 + date_format = '%Y-%m-%d %H:%M:%S' + if 'colorlog' in sys.modules and os.isatty(2): + cformat = '%(log_color)s' + cfg.LOGFILE_FORMAT + f = colorlog.ColoredFormatter(cformat, date_format, + log_colors = { 'DEBUG' : 'yellow', 'INFO' : 'white', + 'WARNING' : 'bold_yellow', 'ERROR': 'bold_red', + 'CRITICAL': 'bold_red' }) + else: + f = logging.Formatter(cfg.LOGFILE_FORMAT, date_format) + + # logger = logging.getLogger(Logger.logger_name) + logger = logging.getLogger(cfg.LOGGER_NAME) + logger.setLevel(loglevel) + + if logfile: + # create file handler which logs even debug messages + fh = logging.FileHandler(logfile) + fh.setLevel(loglevel) + logger.addHandler(fh) + + # add colored handler + ch = logging.StreamHandler() + ch.setFormatter(f) + logger.addHandler(ch) + + if verbosity == 0: + logger.disabled = True + + # test + # logger.debug('Hello Debug') + # logger.info('Hello Info') + # logger.warn('Hello Warn') + # logger.error('Hello Error') + # logger.critical('Hello Critical') + + return logger + + @staticmethod + def getLogger(): + return logging.getLogger(cfg.LOGGER_NAME) \ No newline at end of file diff --git a/cli/app/utils/plot_utils.py b/cli/app/utils/plot_utils.py new file mode 100644 index 0000000..5bbb8ac --- /dev/null +++ b/cli/app/utils/plot_utils.py @@ -0,0 +1,149 @@ +import sys +from os.path import join +import time +import random +from pathlib import Path + +import numpy as np + +import matplotlib.pyplot as plt +import matplotlib.animation +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cbook +from matplotlib import cm +from matplotlib import animation + + + +# --------------------------------------------------------------------------- +# +# Matplotlib drawing functions +# +# --------------------------------------------------------------------------- + +# Generate random hex colors +def rhex(): + r = lambda: random.randint(0,255) + return '#%02X%02X%02X' % (r(), r(), r()) + + # line weight +def generate_3d_landmark_anim(lm, fp_out, num_frames=30, fps=12, dpi=72, size=(480,480), + stroke_weight=2, mark_size=10, mark_type='.', bg_clr=(0,0,0), transparent=False): + '''Generates animated 3D plot of face landmarks + ''' + + # convert opencv BGR numpy image to RGB + bg_clr_hex = '#%02x%02x%02x' % bg_clr + #mark_clr = '#%02x%02x%02x' % mark_clr + + # center x,y,z + xmm = (np.min(lm[:,0]),np.max(lm[:,0])) + ymm = (np.min(lm[:,1]),np.max(lm[:,1])) + zmm = (np.min(lm[:,2]),np.max(lm[:,2])) + + # make copy of landmarks + lm_orig = lm.copy() + xmm = (np.min(lm_orig[:,0]),np.max(lm_orig[:,0])) + ymm = (np.min(lm_orig[:,1]),np.max(lm_orig[:,1])) + zmm = (np.min(lm_orig[:,2]),np.max(lm_orig[:,2])) + + # swap the y and z components to improve 3d rotation angles for matplotlib + lm = np.zeros_like(lm_orig).astype(np.uint8) + for i,p in enumerate(lm_orig): + x,y,z = p + lm[i] = np.array([x - xmm[0], z - zmm[0], y - ymm[0]]) + + # Create plot + figsize = (size[0]/dpi, size[1]/dpi ) + fig = plt.figure(figsize=figsize, dpi=dpi) # frameon=False + fig.tight_layout() + # remove whitespace in matplotlib + fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=None, hspace=None) + ax = fig.add_subplot(111, projection='3d') + ax.set_facecolor(bg_clr_hex) # background color + + xscale, yscale, zscale = (1.2, 1.0, 1.0) + + # scatter plot the dots + + # jaw line + mark_clr = '#%02x%02x%02x' % (0,255,0) # green + ax.plot3D(lm[:17,0]*1.2,lm[:17,1], lm[:17,2], + marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # stage-right eyebrow + mark_clr = '#%02x%02x%02x' % (255,0,0) # green + ax.plot3D(lm[17:22,0]*1.2,lm[17:22,1],lm[17:22,2], + marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # stage-left eyebrow + mark_clr = '#%02x%02x%02x' % (255,255,0) # yellow + ax.plot3D(lm[22:27,0]*1.2,lm[22:27,1],lm[22:27,2], + marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # nose ridge + mark_clr = '#%02x%02x%02x' % (0,0,255) # blue + ax.plot3D(lm[27:31,0]*1.2,lm[27:31,1],lm[27:31,2], + marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # nose-bottom + mark_clr = '#%02x%02x%02x' % (255,0,255) # magenta + ax.plot3D(lm[31:36,0]*1.2,lm[31:36,1],lm[31:36,2], + marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # stage-left eye + mark_clr = '#%02x%02x%02x' % (0,255,255) # cyan + px, py, pz = lm[36:42,0]*1.2,lm[36:42,1],lm[36:42,2] + px = np.append(px, lm[36,0]*1.2) + py = np.append(py, lm[36,1]) + pz = np.append(pz, lm[36,2]) + ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # stage-right eye + mark_clr = '#%02x%02x%02x' % (255,255,255) # white + px, py, pz = lm[42:48,0]*1.2,lm[42:48,1],lm[42:48,2] + px = np.append(px, lm[42,0]*1.2) + py = np.append(py, lm[42,1]) + pz = np.append(pz, lm[42,2]) + ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight) + + # mouth + mark_clr = '#%02x%02x%02x' % (255,125,0) # orange? + px, py, pz = lm[48:,0]*1.2,lm[48:,1],lm[48:,2] + px = np.append(px, lm[48,0]*1.2) + py = np.append(py, lm[48,1]) + pz = np.append(pz, lm[48,2]) + ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr, linewidth=stroke_weight) + + #rh = '#00ff00' # edge color + #ax.scatter(lm[:,0]*xscale,lm[:,1]*yscale,lm[:,2]*zscale, c=rh, alpha=1.0, s=35, edgecolor=rh) + #ax.scatter(lm[:,0]*xscale,lm[:,1]*yscale,lm[:,2]*zscale, c=rh, alpha=1.0, s=1) + + # center center x,y,z points + cx = ((xmm[0] - xmm[1]) // 2) + xmm[1] + cy = ((ymm[1] - ymm[0]) // 2) + ymm[0] + cz = ((zmm[1] - zmm[0]) // 2) + zmm[0] + + # remove ticks + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_zticks([]) + + # remove axis + ax.set_frame_on(False) + ax.set_axis_off() + + # set initial plot view + ax.view_init(elev=120., azim=70.) + + # rotation increments: from 0 to 360 in num_frames + phi = np.linspace(0, 2*np.pi, num_frames) + + # animation instruction + def update(phi): + ax.view_init(180,phi*180./np.pi) + + ani = matplotlib.animation.FuncAnimation(fig, update, frames=phi) + + savefig_kwargs = {'pad_inches': 0, 'transparent': transparent} + ani.save(fp_out, writer='imagemagick', fps=fps, savefig_kwargs=savefig_kwargs) \ No newline at end of file diff --git a/cli/app/utils/sheet_utils.py b/cli/app/utils/sheet_utils.py new file mode 100644 index 0000000..85f979c --- /dev/null +++ b/cli/app/utils/sheet_utils.py @@ -0,0 +1,82 @@ +import os +import gspread +from oauth2client.service_account import ServiceAccountCredentials + +from app.settings import app_cfg as cfg + +def fetch_spreadsheet(): + """Open the Google Spreadsheet, which contains the individual worksheets""" + scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] + path = os.path.dirname(os.path.abspath(__file__)) + credentials = ServiceAccountCredentials.from_json_keyfile_name(cfg.GOOGLE_ACCOUNT_CREDS_PATH, scope) + docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc" + client = gspread.authorize(credentials) + spreadsheet = client.open_by_key(docid) + return spreadsheet + +def fetch_worksheet(name): + """Get a reference to a particular "worksheet" from the Google Spreadsheet""" + spreadsheet = fetch_spreadsheet() + return spreadsheet.worksheet(name) + +def fetch_google_sheet(name): + """Get all the values from a particular worksheet as a list of lists. + Returns: + :keys - the first row of the document + :lines - a list of lists with the rest of the rows""" + rows = fetch_worksheet(name).get_all_values() + keys = rows[0] + lines = rows[1:] + return keys, lines + +def fetch_google_sheet_objects(name): + """Get all the values from a worksheet as a list of dictionaries""" + keys, rows = fetch_google_sheet(name) + recs = [] + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + recs.append(rec) + return recs + +def fetch_google_lookup(name, item_key='key'): + """Get all the values from a worksheet as a dictionary of dictionaries. + Specify which field you want to use as the dictionary key.""" + keys, rows = fetch_google_sheet(name) + lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + lookup[rec[item_key]] = rec + return lookup + +def fetch_verified_paper_lookup(): + """Fetch a lookup keyed by dataset, where each dataset points to a hash of valid or invalid papers...""" + keys, rows = fetch_google_sheet('verifications') + verified_lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + if rec['dataset'] not in verified_lookup: + verified_lookup[rec['dataset']] = {} + if str(rec['uses_dataset']) == '1': + verified_lookup[rec['dataset']][rec['paper_id']] = rec + return verified_lookup + +def update_or_append_worksheet(name, form): + """Update a row if it exists, create it if it doesn't""" + worksheet = fetch_worksheet(name) + keys = worksheet.row_values(1) + row = [ form[key] if key in form else '' for key in keys ] + try: + cell = worksheet.find(form['paper_id']) + except: + cell = None + if cell: + for i, item in enumerate(row): + worksheet.update_cell(cell.row, i+1, item) + else: + worksheet.append_row(row) -- cgit v1.2.3-70-g09d2