12 files changed, 2102 insertions, 0 deletions
diff --git a/cli/app/utils/api_utils.py b/cli/app/utils/api_utils.py
new file mode 100644
index 0000000..a4dad50
--- /dev/null
+++ b/cli/app/utils/api_utils.py
@@ -0,0 +1,170 @@
+import json
+import urllib
+import urllib.request
+
+from app.settings import app_cfg
+from app.utils import logger_utils
+
+
+class WikipediaAPI:
+  
+  url_base = 'https://en.wikipedia.org/w/api.php'
+  log = logger_utils.Logger.getLogger()
+  # https://en.wikipedia.org/w/api.php?redirects=&
+    # ppprop=displaytitle&prop=pageprops|pageimages|description&generator=prefixsearch
+    # &action=query&format=json&piprop=thumbnail&pithumbsize=160&pilimit=6&gpssearch=Vicente+Fox&gpsnamespace=0&gpslimit=6
+
+  def _url_builder(self, q):
+    
+    # https://www.mediawiki.org/wiki/API%3aProperties#Info%3a_Parameters
+    params = {
+      'redirects': '',
+      'ppprop': 'displaytitle',
+      'prop': 'pageprops|pageimages|description',
+      'generator': 'prefixsearch',
+      'action': 'query',
+      'format': 'json',
+      'piprop': 'thumbnail',
+      #'pithumbsize': 160,
+      'pilimit': 1,
+      'gpssearch': q,
+      'gpsnamespace': 0,
+      'gpslimit': 1
+    }
+    url = f'{self.url_base}?{urllib.parse.urlencode(params)}'
+    return url
+
+  def _api_search(self, url):
+    # set empty object
+    obj = {
+      'wp_description': '',
+      'wp_page_id': '',
+      'wp_name': ''
+    }
+    try:
+      json_response = urllib.request.urlopen(url).read()
+      response = json.loads(json_response)
+      obj['wp_accessed'] = True
+      query = response.get('query', None)
+      if query:
+        pages = query.get('pages',[])
+        if pages:
+          page_id= list(pages.keys())[0]
+          if int(page_id) != -1: 
+            page = pages[page_id]
+            # populate with successful result
+            obj['wp_name'] = page['title']
+            obj['wp_page_id'] = page_id
+            obj['wp_description'] = page.get('description', '')  # not always available
+    # if fail, return None
+    except Exception as e:
+      obj['wp_error'] = e
+      obj['wp_accessed'] = False
+    return obj
+
+  def get_meta(self, query_obj, verbose=False):
+    '''Searches Wikipedia API for query string'''
+
+    if query_obj.get('wp_accessed', False):
+      return query_obj
+    else:
+      url = self._url_builder(query_obj['query'])
+      if verbose:
+        self.log.debug(f'querying: {url}')
+        print(url)
+      return self._api_search(url)
+
+  def search(self, q):
+    '''Searches Wikipedia API for query string'''
+    url = self._url_builder(q)
+    return self._api_search(url)
+
+
+class GoogleKnowledgeGraph:
+
+  url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search'
+  log = logger_utils.Logger.getLogger()
+  fp_api_key = app_cfg.FP_KNOWLEDGE_GRAPH_ENV
+
+  def __init__(self, api_key=None):
+    if api_key is not None:
+      self._api_key = api_key
+    else:
+      self._api_key = open(self.fp_api_key).read()
+
+
+  def _get_kg_meta(self, result_obj, params):
+    
+    params['indent'] = True  # JSON indent
+    params['key'] = self._api_key
+    params['limit'] = 1
+    '''
+    Restricts returned entities to those of the specified types. 
+    For example, you can specify `Person` (as defined in http://schema.org/Person) 
+    to restrict the results to entities representing people. 
+    If multiple types are specified, returned entities will contain one or more of these type'''
+    params['types'] = 'Person'
+    
+    '''Enables prefix (initial substring) match against names and
+      aliases of entities. For example, a prefix `Jung` will match entities 
+      and aliases such as `Jung`, `Jungle`, and `Jung-ho Kang`.'''
+    params['prefix'] = False
+    
+    url = f'{self.url_kg_api}?{urllib.parse.urlencode(params)}'
+    try:
+      json_response = urllib.request.urlopen(url).read()
+    except Exception as e:
+      result_obj['kg_error'] = str(e)
+      result_obj['kg_accessed'] = False
+    else:
+      response = json.loads(json_response)
+      items = response.get('itemListElement', [])
+      if items:
+        item = items[0]
+        item_result = item.get('result', [])
+        result_obj['kg_url'] = item.get('url', '')
+        result_obj['kg_description'] = item_result.get('description', '')
+        result_obj['kg_id'] = item_result.get('@id', '').replace('kg:','')
+        result_obj['kg_name'] = item_result.get('name', '')
+        result_obj['kg_score'] = item.get('resultScore', 0.0)
+        det_desc = item_result.get('detailedDescription', '')
+        if det_desc:
+          result_obj['kg_bio'] = det_desc.get('articleBody','')
+          result_obj['kg_bio_url'] = det_desc.get('url','')
+        else:
+          result_obj['kg_bio'] = ''
+          result_obj['kg_bio_url'] = ''
+        result_img = item_result.get('image', '')
+        if result_img:
+          result_obj['kg_image_url'] = result_img.get('contentUrl', '')
+        else:
+          result_obj['kg_image_url'] = ''
+        result_obj['kg_error'] = ''
+      else:
+        # search was valid but no results
+        result_obj['kg_url'] = ''
+        result_obj['kg_description'] = ''
+        result_obj['kg_id'] = ''
+        result_obj['kg_name'] = ''
+        result_obj['kg_score'] = 0
+        result_obj['kg_bio'] = ''
+        result_obj['kg_bio_url'] = ''
+        result_obj['kg_image_url'] = ''
+     
+      result_obj['kg_accessed'] = True
+
+    return result_obj
+    
+
+  def get_kg_from_name(self, obj):
+    if obj.get('kg_accessed', False):
+      return obj
+    params = {'query': obj['query']}
+    return self._get_kg_meta(obj, params)
+    
+
+  def get_kg_from_kg_id(self, obj):
+    if obj.get('kg_accessed', False):
+      return obj
+    params = {'ids': obj['kg_ig']}
+    return self._get_kg_meta(obj, params)
diff --git a/cli/app/utils/click_factory.py b/cli/app/utils/click_factory.py
new file mode 100644
index 0000000..61a3b5e
--- /dev/null
+++ b/cli/app/utils/click_factory.py
@@ -0,0 +1,145 @@
+"""
+Click processor factory
+- Inspired by and used code from @wiretapped's HTSLAM codebase
+- In particular the very useful 
+"""
+
+import os
+import sys
+from os.path import join
+from pathlib import Path
+import os
+from os.path import join
+import sys
+from functools import update_wrapper, wraps
+import itertools
+from pathlib import Path
+from glob import glob
+import importlib
+import logging
+
+import click
+from app.settings import app_cfg as cfg
+
+
+# --------------------------------------------------------
+# Click Group Class
+# --------------------------------------------------------
+
+# set global variable during parent class create
+dir_plugins = None  # set in create
+
+class ClickComplex:
+  """Wrapper generator for custom Click CLI's based on LR's coroutine"""
+
+  def __init__(self):
+    pass
+
+
+  class CustomGroup(click.Group):
+    #global dir_plugins  # from CliGenerator init
+
+    # lists commands in plugin directory
+    def list_commands(self, ctx):
+      global dir_plugins  # from CliGenerator init
+      rv = list(self.commands.keys())
+      fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \
+        if str(x).endswith('.py') \
+        and '__init__' not in str(x)]
+      for fp_cmd in fp_cmds:
+        try:
+          assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name)
+        except Exception as ex:
+          logging.getLogger('app').error('{}'.format(ex))
+        rv.append(fp_cmd.stem)
+      rv.sort()
+      return rv
+
+    # Complex version: gets commands in directory and in this file
+    # Based on code from @wiretapped + HTSLAM
+    def get_command(self, ctx, cmd_name):
+      global dir_plugins 
+      if cmd_name in self.commands:
+       return self.commands[cmd_name]
+      ns = {}
+      fpp_cmd = Path(dir_plugins, cmd_name + '.py')
+      fp_cmd = fpp_cmd.as_posix()
+      if not fpp_cmd.exists():
+        sys.exit('[-] {} file does not exist'.format(fpp_cmd))
+      code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec')
+      try:
+        eval(code, ns, ns)
+      except Exception as ex:
+        logging.getLogger('vframe').error('exception: {}'.format(ex))
+        @click.command()
+        def _fail():
+          raise Exception('while loading {}'.format(fpp_cmd.name))
+        _fail.short_help = repr(ex)
+        _fail.help = repr(ex)
+        return _fail
+      if 'cli' not in ns:
+        sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd))
+      return ns['cli']
+
+  @classmethod
+  def create(self, dir_plugins_local):
+    global dir_plugins
+    dir_plugins = dir_plugins_local
+    return self.CustomGroup
+
+
+
+class ClickSimple:
+  """Wrapper generator for custom Click CLI's"""
+
+  def __init__(self):
+    pass
+
+
+  class CustomGroup(click.Group):
+    #global dir_plugins  # from CliGenerator init
+
+    # lists commands in plugin directory
+    def list_commands(self, ctx):
+      global dir_plugins  # from CliGenerator init
+      rv = list(self.commands.keys())
+      fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \
+        if str(x).endswith('.py') \
+        and '__init__' not in str(x)]
+      for fp_cmd in fp_cmds:
+        assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name)
+        rv.append(fp_cmd.stem)
+      rv.sort()
+      return rv
+
+    # Complex version: gets commands in directory and in this file
+    # from HTSLAM
+    def get_command(self, ctx, cmd_name):
+      global dir_plugins  # from CliGenerator init
+      if cmd_name in self.commands:
+       return self.commands[cmd_name]
+      ns = {}
+      fpp_cmd = Path(dir_plugins, cmd_name + '.py')
+      fp_cmd = fpp_cmd.as_posix()
+      if not fpp_cmd.exists():
+        sys.exit('[-] {} file does not exist'.format(fpp_cmd))
+      code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec')
+      try:
+        eval(code, ns, ns)
+      except Exception as ex:
+        logging.getLogger('vframe').error('exception: {}'.format(ex))
+        @click.command()
+        def _fail():
+          raise Exception('while loading {}'.format(fpp_cmd.name))
+        _fail.short_help = repr(ex)
+        _fail.help = repr(ex)
+        return _fail
+      if 'cli' not in ns:
+        sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd))
+      return ns['cli']
+
+  @classmethod
+  def create(self, dir_plugins_local):
+    global dir_plugins
+    dir_plugins = dir_plugins_local
+    return self.CustomGroup
diff --git a/cli/app/utils/click_utils.py b/cli/app/utils/click_utils.py
new file mode 100644
index 0000000..dc00f58
--- /dev/null
+++ b/cli/app/utils/click_utils.py
@@ -0,0 +1,62 @@
+"""
+Custom Click parameter types
+"""
+import click
+
+from app.settings import app_cfg as cfg
+from app.settings import types
+
+
+# --------------------------------------------------------
+# Click command helpers
+# --------------------------------------------------------
+def enum_to_names(enum_type):
+  return {x.name.lower(): x for x in enum_type}
+  
+def show_help(enum_type):
+  names = enum_to_names(enum_type)
+  return 'Options: "{}"'.format(', '.join(list(names.keys())))
+
+def get_default(opt):
+  return opt.name.lower()
+
+
+# --------------------------------------------------------
+# Custom Click parameter class
+# --------------------------------------------------------
+
+
+class ParamVar(click.ParamType):
+
+  name = 'default_type'
+
+  def __init__(self, param_type):
+    # self.name = '{}'.format(param_type.name.lower())
+    # sealf.
+    self.ops =  {x.name.lower(): x for x in param_type}
+  
+  def convert(self, value, param, ctx):
+    """converts (str) repr to Enum hash"""
+    try:
+      return self.ops[value.lower()]
+    except:
+      self.fail('{} is not a valid option'.format(value, param, ctx))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cli/app/utils/color_utils.py b/cli/app/utils/color_utils.py
new file mode 100644
index 0000000..e4d33ef
--- /dev/null
+++ b/cli/app/utils/color_utils.py
@@ -0,0 +1,5 @@
+import colorsys
+
+
+def hsv2rgb_int(h,s,v):
+  return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(h,s,v))
+\ No newline at end of file
diff --git a/cli/app/utils/display_utils.py b/cli/app/utils/display_utils.py
new file mode 100644
index 0000000..8e265ae
--- /dev/null
+++ b/cli/app/utils/display_utils.py
@@ -0,0 +1,28 @@
+import sys
+
+import cv2 as cv
+
+from app.utils.logger_utils import Logger
+
+
+log = Logger.getLogger()
+
+def handle_keyboard(delay_amt=1):
+  '''Used with cv.imshow('title', image) to wait for keyboard press
+  '''
+  while True:
+    k = cv.waitKey(delay_amt) & 0xFF
+    if k == 27 or k == ord('q'):  # ESC
+      cv.destroyAllWindows()
+      sys.exit()
+    elif k == 32 or k == 83:  # 83 = right arrow
+      break
+    elif k != 255:
+      log.debug(f'k: {k}')
+
+def handle_keyboard_video(delay_amt=1):
+  key = cv.waitKey(1) & 0xFF
+  # if the `q` key was pressed, break from the loop
+  if key == ord("q"):
+    cv.destroyAllWindows()
+    sys.exit()
diff --git a/cli/app/utils/draw_utils.py b/cli/app/utils/draw_utils.py
new file mode 100644
index 0000000..18c03f2
--- /dev/null
+++ b/cli/app/utils/draw_utils.py
@@ -0,0 +1,215 @@
+import sys
+from math import sqrt
+
+import numpy as np
+import cv2 as cv
+import PIL
+from PIL import ImageDraw
+
+from app.utils import im_utils
+from app.settings import app_cfg
+
+log = app_cfg.LOG
+
+end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1
+
+# ---------------------------------------------------------------------------
+#
+# 3D landmark drawing utilities
+#
+# ---------------------------------------------------------------------------
+
+def plot_keypoints(im, kpts):
+  '''Draw 68 key points
+  :param im: the input im
+  :param kpts: (68, 3). flattened list
+  '''
+  im = im.copy()
+  kpts = np.round(kpts).astype(np.int32)
+  for i in range(kpts.shape[0]):
+    st = kpts[i, :2]
+    im = cv.circle(im, (st[0], st[1]), 1, (0, 0, 255), 2)
+    if i in end_list:
+      continue
+    ed = kpts[i + 1, :2]
+    im = cv.line(im, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
+  return im
+
+
+def calc_hypotenuse(pts):
+  bbox = [min(pts[0, :]), min(pts[1, :]), max(pts[0, :]), max(pts[1, :])]
+  center = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
+  radius = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 2
+  bbox = [center[0] - radius, center[1] - radius, center[0] + radius, center[1] + radius]
+  llength = sqrt((bbox[2] - bbox[0]) ** 2 + (bbox[3] - bbox[1]) ** 2)
+  return llength / 3
+
+def build_camera_box(rear_size=90):
+  point_3d = []
+  rear_depth = 0
+  point_3d.append((-rear_size, -rear_size, rear_depth))
+  point_3d.append((-rear_size, rear_size, rear_depth))
+  point_3d.append((rear_size, rear_size, rear_depth))
+  point_3d.append((rear_size, -rear_size, rear_depth))
+  point_3d.append((-rear_size, -rear_size, rear_depth))
+
+  front_size = int(4 / 3 * rear_size)
+  front_depth = int(4 / 3 * rear_size)
+  point_3d.append((-front_size, -front_size, front_depth))
+  point_3d.append((-front_size, front_size, front_depth))
+  point_3d.append((front_size, front_size, front_depth))
+  point_3d.append((front_size, -front_size, front_depth))
+  point_3d.append((-front_size, -front_size, front_depth))
+  point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
+
+  return point_3d
+
+
+def plot_pose_box(im, Ps, pts68s, color=(40, 255, 0), line_width=2):
+  '''Draw a 3D box as annotation of pose. 
+    ref: https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
+  :param image: the input image
+  :param P: (3, 4). Affine Camera Matrix.
+  :param kpts: (2, 68) or (3, 68)
+  '''
+  im_draw = im.copy()
+  if not isinstance(pts68s, list):
+    pts68s = [pts68s]
+  
+  if not isinstance(Ps, list):
+    Ps = [Ps]
+  
+  for i in range(len(pts68s)):
+    pts68 = pts68s[i]
+    llength = calc_hypotenuse(pts68)
+    point_3d = build_camera_box(llength)
+    P = Ps[i]
+
+    # Map to 2d im points
+    point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1])))  # n x 4
+    point_2d = point_3d_homo.dot(P.T)[:, :2]
+
+    point_2d[:, 1] = - point_2d[:, 1]
+    point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(pts68[:2, :27], 1)
+    point_2d = np.int32(point_2d.reshape(-1, 2))
+
+    # Draw all the lines
+    cv.polylines(im_draw, [point_2d], True, color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[1]), tuple(point_2d[6]), color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[2]), tuple(point_2d[7]), color, line_width, cv.LINE_AA)
+    cv.line(im_draw, tuple(point_2d[3]), tuple(point_2d[8]), color, line_width, cv.LINE_AA)
+
+    return im_draw
+
+
+
+# ---------------------------------------------------------------------------
+#
+# OpenCV drawing functions
+#
+# ---------------------------------------------------------------------------
+
+pose_types = {'pitch': (0,0,255), 'roll': (255,0,0), 'yaw': (0,255,0)}
+
+def draw_landmarks2d_cv(im, points_norm, radius=3, color=(0,255,0)):
+  '''Draws facial landmarks, either 5pt or 68pt
+  '''
+  im_dst = im.copy()
+  dim = im.shape[:2][::-1]
+  for x,y in points_norm:
+    pt = (int(x*dim[0]), int(y*dim[1]))
+    cv.circle(im_dst, pt, radius, color, -1, cv.LINE_AA)
+  return im_dst
+
+def draw_landmarks2d_pil(im_pil, points_norm, radius=3, color=(0,255,0)):
+  '''Draws facial landmarks, either 5pt or 68pt
+  '''
+  assert im_utils.is_pil(im_pil)
+  draw = ImageDraw.Draw(im_pil)
+  dim = im.shape[:2][::-1]
+  for x,y in points_norm:
+    x1, y1 = (int(x*dim[0]), int(y*dim[1]))
+    xyxy = (x1, y1, x1+radius, y1+radius)
+    draw.ellipse(xyxy, fill='white')
+  del draw
+  im_dst = im_utils.ensure_np(im_pil)
+  im_dst = im_utils.rgb2bgr(im_dst)
+  return im_dst
+
+
+def draw_landmarks3D_cv(im, points, radius=3, color=(0,255,0)):
+  '''Draws 3D facial landmarks
+  '''
+  im_dst = im.copy()
+  for x,y,z in points:
+    cv.circle(im_dst, (x,y), radius, color, -1, cv.LINE_AA)
+  return im_dst
+
+
+def draw_bbox_cv(im_np, bbox_norm, color=(0,255,0), stroke_weight=2):
+  '''Draws BBox onto cv image
+  '''
+  bbox_dim = bbox_norm.to_bbox_dim(im_np.shape[:2][::-1])
+  return cv.rectangle(im_np, bbox_dim.p1.xy, bbox_dim.p2.xy, color, stroke_weight, cv.LINE_AA)
+  
+
+def draw_bbox_pil(im, bboxes_norm, color=(0,255,0), stroke_weight=2):
+  '''Draws BBox onto cv image
+  :param color: RGB value
+  '''
+  if im_utils.is_np(im):
+    im = im_utils.np2pil(im)
+    was_np = True
+  else:
+    was_np = False
+
+  if not type(bboxes_norm) == list:
+    bboxes_norm = [bboxes_norm]
+  
+
+  im_draw = ImageDraw.ImageDraw(im)
+
+  for bbox_norm in bboxes_norm:
+    bbox_dim = bbox_norm.to_bbox_dim(im.size)
+    xyxy = (bbox_dim.p1.xy, bbox_dim.p2.xy)  
+    im_draw.rectangle(xyxy, outline=color, width=stroke_weight)
+  del im_draw
+
+  if was_np:
+    im = im_utils.pil2np(im)
+  return im
+
+
+def draw_pose(im, pt_nose, image_pts):
+  '''Draws 3-axis pose over image
+  TODO: normalize point data
+  '''
+  im_dst = im.copy()
+  log.debug(f'pt_nose: {pt_nose}')
+  log.debug(f'image_pts pitch: {image_pts["pitch"]}')
+  cv.line(im_dst, pt_nose, tuple(image_pts['pitch']), pose_types['pitch'], 3)
+  cv.line(im_dst, pt_nose, tuple(image_pts['yaw']), pose_types['yaw'], 3)
+  cv.line(im_dst, pt_nose, tuple(image_pts['roll']), pose_types['roll'], 3)
+  return im_dst
+
+def draw_text_cv(im, pt_norm, text, size=1.0, color=(0,255,0)):
+  '''Draws degrees as text over image
+  '''
+  im_dst = im.copy()
+  dim = im.shape[:2][::-1]
+  pt = tuple(map(int, (pt_norm[0]*dim[0], pt_norm[1]*dim[1])))
+  cv.putText(im_dst, text, pt, cv.FONT_HERSHEY_SIMPLEX, size, color, thickness=1, lineType=cv.LINE_AA)
+  return im_dst
+
+
+def draw_degrees(im, pose_data, color=(0,255,0)):
+  '''Draws degrees as text over image
+  '''
+  im_dst = im.copy()
+  for i, pose_type in enumerate(pose_types.items()):
+    k, clr = pose_type
+    v = pose_data[k]
+    t = '{}: {:.2f}'.format(k, v)
+    origin = (10, 30 + (25 * i))
+    cv.putText(im_dst, t, origin, cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, thickness=2, lineType=2)
+  return im_dst
+\ No newline at end of file
diff --git a/cli/app/utils/file_utils.py b/cli/app/utils/file_utils.py
new file mode 100644
index 0000000..5feab32
--- /dev/null
+++ b/cli/app/utils/file_utils.py
@@ -0,0 +1,461 @@
+"""
+File utilities
+"""
+import sys
+import os
+from os.path import join
+import stat
+
+from glob import glob
+from pprint import pprint
+import shutil
+import distutils
+import pathlib
+from pathlib import Path
+import json
+import csv
+import pickle
+import threading
+from queue import Queue
+import time
+import logging
+import itertools
+import collections
+
+import hashlib
+import pymediainfo
+import click
+from tqdm import tqdm
+import cv2 as cv
+from PIL import Image
+import imutils
+
+from app.settings import app_cfg as cfg
+from app.settings import types
+
+log = logging.getLogger(cfg.LOGGER_NAME)
+
+
+# ------------------------------------------
+# File I/O read/write little helpers
+# ------------------------------------------
+
+def glob_multi(dir_in, exts=['jpg'], recursive=False):
+  files = []
+  for ext in exts:
+    if recursive:
+      fp_glob = join(dir_in, '**/*.{}'.format(ext))
+      log.info(f'glob {fp_glob}')
+      files +=  glob(fp_glob, recursive=True)
+    else:
+      fp_glob = join(dir_in, '*.{}'.format(ext))
+      files += glob(fp_glob)
+  return files
+
+
+def zpad(x, zeros=cfg.ZERO_PADDING):
+  return str(x).zfill(zeros)
+
+def get_ext(fpp, lower=True):
+  """Retuns the file extension w/o dot
+  :param fpp: (Pathlib.path) filepath
+  :param lower: (bool) force lowercase
+  :returns: (str) file extension (ie 'jpg')
+  """
+  fpp = ensure_posixpath(fpp)
+  ext = fpp.suffix.replace('.', '')
+  return ext.lower() if lower else ext
+
+
+def convert(fp_in, fp_out):
+  """Converts between JSON and Pickle formats
+  Pickle files are about 30-40% smaller filesize
+  """
+  if get_ext(fp_in) == get_ext(fp_out):
+    log.error('Input: {} and output: {} are the same. Use this to convert.')
+
+  lazywrite(lazyload(fp_in), fp_out)
+
+
+def load_csv(fp_in, as_list=True):
+  """Loads CSV and retuns list of items
+  :param fp_in: string filepath to CSV
+  :returns: list of all CSV data
+  """ 
+  if not Path(fp_in).exists():
+    log.info('not found: {}'.format(fp_in))
+  log.info('loading: {}'.format(fp_in))
+  with open(fp_in, 'r') as fp:
+    items = csv.DictReader(fp)
+    if as_list:
+      items = [x for x in items]
+    log.info('returning {:,} items'.format(len(items)))
+    return items
+
+def unfussy_csv_reader(reader):
+  """Loads a CSV while ignoring possible data errors
+  :param reader: Special reader for load_csv_safe which ignores CSV parse errors
+  """
+  while True:
+    try:
+      yield next(reader)
+    except StopIteration:
+      return
+    except csv.Error:
+      print(csv.Error)
+      # log the problem or whatever
+      continue
+
+def load_csv_safe(fp_in, keys=True, create=False):
+  """Loads a CSV while ignoring possible data errors
+  :param fp_in: string filepath to JSON file
+  :param keys: boolean set to false if the first line is not headers (for some reason)
+  :param create: boolean set to true to return an empty keys/values if the CSV does not exist
+  """ 
+  try:
+    with open(fp_in, 'r', newline='', encoding='utf-8') as f:
+      # reader = csv.reader( (line.replace('\0','') for line in f) )
+      reader = csv.reader(f)
+      lines = list(unfussy_csv_reader(reader))
+      if keys:
+        keys = lines[0]
+        lines = lines[1:]
+        return keys, lines
+      return lines
+  except:
+    if create:
+      if keys:
+        return {}, []
+      return []
+    raise
+
+def load_recipe(fp_in):
+  """Loads a JSON file as an object with properties accessible with dot syntax
+  :param fp_in: string filepath to JSON file
+  """ 
+  with open(path) as fh:
+    return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values()))
+
+
+def lazywrite(data, fp_out, sort_keys=True):
+  """Writes JSON or Pickle data"""
+  ext = get_ext(fp_out)
+  if ext == 'json':
+    return write_json(data, fp_out, sort_keys=sort_keys)
+  elif ext == 'pkl':
+    return write_pickle(data, fp_out)
+  else:
+    raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext))
+
+
+def lazyload(fp_in, ordered=True):
+  """Loads JSON or Pickle serialized data"""
+  if not Path(fp_in).exists():
+    log.error('file does not exist: {}'.format(fp_in))
+    return {}
+  ext = get_ext(fp_in)
+  if ext == 'json':
+    items = load_json(fp_in)
+  elif ext == 'pkl':
+    items = load_pickle(fp_in)
+  else:
+    raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext))
+
+  if ordered:
+    return collections.OrderedDict(sorted(items.items(), key=lambda t: t[0]))
+  else:
+    return items
+
+
+def load_text(fp_in):
+  with open(fp_in, 'rt') as fp:
+    lines = fp.read().rstrip('\n').split('\n')
+  return lines
+
+def load_json(fp_in):
+  """Loads JSON and returns items
+  :param fp_in: (str) filepath
+  :returns: data from JSON
+  """
+  if not Path(fp_in).exists():
+    log.error('file does not exist: {}'.format(fp_in))
+    return {}
+  with open(str(fp_in), 'r') as fp:
+    data = json.load(fp)
+  return data
+
+
+def load_pickle(fp_in):
+  """Loads Pickle and returns items
+  :param fp_in: (str) filepath
+  :returns: data from JSON
+  """
+  if not Path(fp_in).exists():
+    log.error('file does not exist: {}'.format(fp_in))
+    return {}
+  with open(str(fp_in), 'rb') as fp:
+    data = pickle.load(fp)
+  return data
+
+
+def order_items(records):
+  """Orders records by ASC SHA256"""
+  return collections.OrderedDict(sorted(records.items(), key=lambda t: t[0]))
+
+def write_text(data, fp_out, ensure_path=True):
+  if not data:
+    log.error('no data')
+    return
+    
+  if ensure_path:
+    mkdirs(fp_out)
+  with open(fp_out, 'w') as fp:
+    if type(data) == list:
+      fp.write('\n'.join(data))
+    else:
+      fp.write(data)
+
+
+def write_pickle(data, fp_out, ensure_path=True):
+  """
+  """
+  if ensure_path:
+    mkdirs(fp_out) # mkdir
+  with open(fp_out, 'wb') as fp:
+    pickle.dump(data, fp)
+
+
+def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False):
+  """
+  """
+  if ensure_path:
+    mkdirs(fp_out)
+  with open(fp_out, 'w') as fp:
+    if minify:
+      json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys)
+    else:
+      json.dump(data, fp, indent=2, sort_keys=sort_keys)
+  if verbose:
+    log.info('Wrote JSON: {}'.format(fp_out))
+
+def write_csv(data, fp_out, header=None):
+  """Write a CSV of key-value pairs"""
+  with open(fp_out, 'w') as fp:
+    writer = csv.DictWriter(fp, fieldnames=header)
+    writer.writeheader()
+    if type(data) is dict:
+      for k, v in data.items():
+        fp.writerow('{},{}'.format(k, v))
+
+def write_csv_table(fn, keys, rows):
+  """Write a CSV of columns and rows"""
+  with open(fn, 'w', encoding='utf-8') as f:
+    writer = csv.writer(f)
+    if keys is not None:
+      writer.writerow(keys)
+    for row in rows:
+      writer.writerow(row)
+
+
+def write_serialized_items(items, fp_out, ensure_path=True, minify=True, sort_keys=True):
+  """Writes serialized data
+  :param items: (dict) a sha256 dict of MappingItems
+  :param serialize: (bool) serialize the data
+  :param ensure_path: ensure the parent directories exist
+  :param minify: reduces JSON file size
+  """
+  log.info('Writing serialized data...')
+  fpp_out = ensure_posixpath(fp_out)
+  serialized_items = {k: v.serialize() for k, v in tqdm(items.items()) }
+  # write data
+  ext = get_ext(fpp_out)
+  if ext == 'json':
+    write_json(serialized_items, fp_out, ensure_path=ensure_path, minify=minify, sort_keys=sort_keys)
+  elif ext == 'pkl':
+    write_pickle(serialized_items, fp_out)
+  else:
+    raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext))
+  log.info('Wrote {:,} items to {}'.format(len(items), fp_out))
+
+
+def write_modeled_data(data, fp_out, ensure_path=False):
+  """
+  """
+  fpp_out = ensure_posixpath(fp_out)
+  if ensure_path:
+    mkdirs(fpp_out)
+  ext = get_ext(fpp_out)
+  if ext == 'pkl':
+    write_pickle(data, str(fp_out))
+  else:
+    raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext))
+
+
+# ---------------------------------------------------------------------
+# Filepath utilities
+# ---------------------------------------------------------------------
+
+def ensure_posixpath(fp):
+  """Ensures filepath is pathlib.Path
+  :param fp: a (str, LazyFile, PosixPath)
+  :returns: a PosixPath filepath object
+  """
+  if type(fp) == str:
+    fpp = Path(fp)
+  elif type(fp) == click.utils.LazyFile:
+    fpp = Path(fp.name)
+  elif type(fp) == pathlib.PosixPath:
+    fpp = fp
+  else:
+    raise TypeError('{} is not a valid filepath type'.format(type(fp)))
+  return fpp
+
+
+def mkdirs(fp):
+  """Ensure parent directories exist for a filepath
+  :param fp: string, Path, or click.File
+  """
+  fpp = ensure_posixpath(fp)
+  fpp = fpp.parent if fpp.suffix else fpp
+  fpp.mkdir(parents=True, exist_ok=True)
+
+
+def ext_media_format(ext):
+  """Converts file extension into Enum MediaType
+  param ext: str of file extension"
+  """
+  for media_format, exts in cfg.VALID_MEDIA_EXTS.items():
+    if ext in exts:
+      return media_format
+  raise ValueError('{} is not a valid option'.format(ext))
+
+
+def sha256(fp_in, block_size=65536):
+  """Generates SHA256 hash for a file
+  :param fp_in: (str) filepath
+  :param block_size: (int) byte size of block
+  :returns: (str) hash
+  """
+  sha256 = hashlib.sha256()
+  with open(fp_in, 'rb') as fp:
+    for block in iter(lambda: fp.read(block_size), b''):
+      sha256.update(block)
+  return sha256.hexdigest()
+
+
+def sha256_tree(sha256):
+  """Split hash into branches with tree-depth for faster file indexing
+  :param sha256: str of a sha256 hash
+  :returns: str with sha256 tree with '/' delimeter
+  """
+  branch_size = cfg.HASH_BRANCH_SIZE
+  tree_size = cfg.HASH_TREE_DEPTH * branch_size
+  sha256_tree = [sha256[i:(i+branch_size)] for i in range(0, tree_size, branch_size)]
+  return '/'.join(sha256_tree)
+
+
+def migrate(fmaps, threads=1, action='copy', force=False):
+  """Copy/move/symlink files form src to dst directory
+  :param fmaps: (dict) with 'src' and 'dst' filepaths
+  :param threads: (int) number of threads
+  :param action: (str) copy/move/symlink
+  :param force: (bool) force overwrite existing files
+  """
+  log = log
+  num_items = len(fmaps)
+  
+  def copytree(src, dst, symlinks = False, ignore = None):
+    # ozxyqk: https://stackoverflow.com/questions/22588225/how-do-you-merge-two-directories-or-move-with-replace-from-the-windows-command
+    if not os.path.exists(dst):
+      mkdirs(dst)
+      # os.makedirs(dst)
+      shutil.copystat(src, dst)
+    lst = os.listdir(src)
+    if ignore:
+      excl = ignore(src, lst)
+      lst = [x for x in lst if x not in excl]
+    for item in lst:
+      s = os.path.join(src, item)
+      d = os.path.join(dst, item)
+      if symlinks and os.path.islink(s):
+        if os.path.exists(d):
+          os.remove(d)
+        os.symlink(os.readlink(s), d)
+        try:
+          st = os.lstat(s)
+          mode = stat.S_IMODE(st.st_mode)
+          os.lchmod(d, mode)
+        except:
+          pass # lchmod not available
+      elif os.path.isdir(s):
+        copytree(s, d, symlinks, ignore)
+      else:
+        shutil.copy(s, d)
+
+  assert(action in ['copy','move','symlink'])
+
+  if threads > 1:
+    # threaded
+    task_queue = Queue()
+    print_lock = threading.Lock()
+
+    def migrate_action(fmap):
+      data_local = threading.local()
+      data_local.src, data_local.dst = (fmap['src'], fmap['dst'])
+      data_local.src_path = Path(data_local.src)
+      data_local.dst_path = Path(data_local.dst)
+      
+      if force or not data_local.dst_path.exists():
+        if action == 'copy':
+          shutil.copy(data_local.src, data_local.dst)
+          #if data_local.src_path.is_dir():
+          #  copytree(data_local.src, data_local.dst)
+          #else:
+        elif action == 'move':
+          shutil.move(data_local.src, data_local.dst)
+        elif action == 'symlink':
+          if force:
+            data_local.dst_path.unlink()
+          Path(data_local.src).symlink_to(data_local.dst)
+
+    def process_queue(num_items):
+      # TODO: progress bar
+      while True:
+        fmap = task_queue.get()
+        migrate_action(fmap)
+        log.info('migrate: {:.2f} {:,}/{:,}'.format( 
+          (task_queue.qsize() / num_items)*100, task_queue.qsize(), num_items))
+        task_queue.task_done()
+
+    # avoid race conditions by creating dir structure here
+    log.info('create directory structure')
+    for fmap in tqdm(fmaps):
+      mkdirs(fmap['dst'])
+
+    # init threads
+    for i in range(threads):
+      t = threading.Thread(target=process_queue, args=(num_items,))
+      t.daemon = True
+      t.start()
+
+    # process threads
+    start = time.time()
+    for fmap in fmaps:
+      task_queue.put(fmap)
+
+    task_queue.join()
+
+  else:
+    # non-threaded
+    for fmap in tqdm(fmaps):
+      mkdirs(fmap['dst'])
+      if action == 'copy':
+        shutil.copy(fmap['src'], fmap['dst'])
+      elif action == 'move':
+        shutil.move(fmap['src'], fmap['dst'])
+      elif action == 'symlink':
+        if force:
+          Path(fmap['dst'].unlink())
+        Path(fp_src).symlink_to(fp_dst)
+  return
+
diff --git a/cli/app/utils/identity_utils.py b/cli/app/utils/identity_utils.py
new file mode 100644
index 0000000..5855fbb
--- /dev/null
+++ b/cli/app/utils/identity_utils.py
@@ -0,0 +1,161 @@
+import os
+from pathlib import Path
+from glob import glob
+import unidecode
+import difflib
+
+from app.settings import types
+from app.models.data_store import DataStore
+from app.utils import logger_utils
+
+log = logger_utils.Logger.getLogger()
+
+az = 'abcdefghijklmlopqrstuvwzxyz'
+AZ = az.upper()
+z9 = list(map(str, list(range(0,10))))
+aZ9 = list(az) + list(AZ) + z9
+
+def letter_strip(a, b=aZ9):
+  # strip every letter from a that is not in b
+  return ''.join([x for x in a if x in b])
+
+def letter_match(a, b):
+  # check if every letter (a-zA-Z0-9) exists in both
+  return sum([x in b for x in a]) == len(a)
+
+def names_match_strict(a, b):
+  clean_a = letter_strip(a)
+  clean_b = letter_strip(b)
+  return len(clean_a) == len(clean_b) and letter_match(clean_a, clean_b) and letter_match(clean_b, clean_a)
+
+
+def sanitize_name(name, as_str=False):
+  splits = [unidecode.unidecode(x.strip().lower()) for x in name.strip().split(' ')]
+  if as_str:
+    return ' '.join(splits)
+  else:
+    return splits
+
+'''
+class Dataset(Enum):
+  LFW, VGG_FACE, VGG_FACE2, MSCELEB, UCCS, UMD_FACES, SCUT_FBP, UCF_SELFIE, UTK, \
+    CASIA_WEBFACE, AFW, PUBFIG83, HELEN, PIPA, MEGAFACE, BRAINWASH, IMDB_WIKI = range(16)
+'''
+# Get list of names based on Dataset type
+def get_names(opt_dataset, opt_data_store=types.DataStore.HDD):
+  data_store = DataStore(opt_data_store, opt_dataset)
+  dir_dataset = data_store.dir_dataset  # path to dataset root
+  dir_media_orig = data_store.dir_media_original
+  if opt_dataset == types.Dataset.AFW:
+    # Annotated Faces in the Wild
+    pass
+  elif opt_dataset == types.Dataset.BRAINWASH:
+    # Brainwash IP Cam dataset
+    pass
+  elif opt_dataset == types.Dataset.CASIA_WEBFACE:
+    # 
+    pass
+  elif opt_dataset == types.Dataset.HELEN:
+    # Helen
+    pass
+  elif opt_dataset == types.Dataset.IMDB_WIKI:
+    # University of Tennessee Knoxville
+    pass
+  elif opt_dataset == types.Dataset.LAG:
+    # Large Age Gap
+    pass
+  elif opt_dataset == types.Dataset.LFW:
+    # Labeled Faces in The Wild
+    names_orig = [x for x in os.listdir(dir_media_orig)]
+    names_query = [x.replace('_', ' ') for x in names_orig]
+  elif opt_dataset == types.Dataset.MEGAFACE:
+    # MegaFace
+    pass
+  elif opt_dataset == types.Dataset.MSCELEB:
+    # MS Celeb
+    pass
+  elif opt_dataset == types.Dataset.PIPA:
+    # People in Photo Albums
+    pass
+  elif opt_dataset == types.Dataset.PUBFIG83:
+    # PubFig83
+    names_orig = [x for x in os.listdir(dir_media_orig) if Path(x).suffix is not '.txt']
+    names_query = [x.replace('_', ' ') for x in names_orig]
+  elif opt_dataset == types.Dataset.SCUT_FBP:
+    # SCUT Facial Beauty Perception
+    pass
+  elif opt_dataset == types.Dataset.UCCS:
+    # Unconstrianed College Students
+    pass
+  elif opt_dataset == types.Dataset.UMD_FACES:
+    # University of Maryland Faces
+    pass
+  elif opt_dataset == types.Dataset.UTK:
+    # University of Tennessee Knoxville
+    pass
+  elif opt_dataset == types.Dataset.UCF_SELFIE:
+    # University of Central Florida Selfie
+    pass
+  elif opt_dataset == types.Dataset.VGG_FACE:
+    # Visual Geometry Group Face 1
+    pass
+  elif opt_dataset == types.Dataset.VGG_FACE2:
+    # Visual Geometry Group Face 2
+    pass
+  else:
+    log.warn(f'{opt_dataset} not yet implemented')
+    names_orig = []
+    names_query = []
+  result = {'names_orig': names_orig, 'names_query': names_query}
+  return result
+
+
+def similarity(a, b):
+  return difflib.SequenceMatcher(a=a.lower(), b=b.lower()).ratio()
+
+def names_match(name_a, name_b, threshold=0.9, as_float=False, compound_score=False, name_a_pre=False, name_b_pre=False):
+  '''Returns boolean if names are similar enough
+  '''
+  # strip spaces and split names into list of plain text words
+  if name_a_pre:
+    name_a_clean = name_a
+  else:
+    name_a_clean = [unidecode.unidecode(x.strip().lower()) for x in name_a.strip().split(' ')]
+  if name_b_pre:
+    name_b_clean = name_b
+  else:
+    name_b_clean = [unidecode.unidecode(x.strip().lower()) for x in name_b.strip().split(' ')]
+  
+  # assign short long vars
+  len_a = len(name_a_clean)
+  len_b = len(name_b_clean)
+  len_min = min(len_a, len_b)
+  len_max = max(len_a, len_b)
+  
+  # compute scores
+  scores = []
+  for i in range(len(name_a_clean)):
+    word_a = name_a_clean[i]
+    subscores = []
+    for j in range(len(name_b_clean)):
+      word_b = name_b_clean[j]
+      score = similarity(word_a, word_b)
+      subscores.append(score)
+    scores.append(subscores)
+  
+  # return result
+  ratio_similar = sum(max(x) for x in scores) / len(scores)
+
+  if compound_score:
+    # combine with any missing letters/words
+    letters_a = sum(len(x) for x in name_a_clean)
+    letters_b = sum(len(x) for x in name_b_clean)
+    ratio_letters = min(letters_a, letters_b) / max(letters_a, letters_b)
+    score = (0.8 * ratio_similar) + (0.2 * ratio_letters)
+  else:
+    score = ratio_similar
+
+  if as_float:
+    return score
+  else:
+    return score > threshold
+\ No newline at end of file
diff --git a/cli/app/utils/im_utils.py b/cli/app/utils/im_utils.py
new file mode 100644
index 0000000..ab6467e
--- /dev/null
+++ b/cli/app/utils/im_utils.py
@@ -0,0 +1,556 @@
+import sys
+import os
+from os.path import join
+import cv2 as cv
+import imagehash
+from PIL import Image, ImageDraw, ImageFilter, ImageOps
+from skimage.filters.rank import entropy
+from skimage.morphology import disk
+from skimage import feature
+# import matplotlib.pyplot as plt
+import imutils
+import time
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+import datetime
+
+def ensure_pil(im):
+    """Ensure image is Pillow format"""
+    try:
+        im.verify()
+        return im
+    except:
+        return Image.fromarray(im.astype('uint8'), 'RGB')
+
+def ensure_np(im):
+    """Ensure image is numpy array"""
+    if type(im) == np.ndarray:
+        return im
+    return np.asarray(im, np.uint8)
+
+def num_channels(im):
+  '''Returns number of channels in numpy.ndarray image'''
+  if len(im.shape) > 2:
+    return im.shape[2]
+  else:
+    return 1
+
+def is_grayscale(im, threshold=5):
+  """Returns True if image is grayscale
+  :param im: (numpy.array) image
+  :return (bool) of if image is grayscale"""
+  b = im[:,:,0]
+  g = im[:,:,1]
+  mean = np.mean(np.abs(g - b))
+  return mean < threshold
+
+
+def compute_features(fe,frames,phashes,phash_thresh=1):
+  """
+  Get vector embedding using FeatureExtractor
+  :param fe: FeatureExtractor class
+  :param frames: list of frame images as numpy.ndarray
+  :param phash_thresh: perceptual hash threshold
+  :returns: list of feature vectors
+  """
+  vals = []
+  phash_pre = phashes[0]
+  for i,im in enumerate(frames):
+    if i == 0 or (phashes[i] - phashes[i-1]) > phash_thresh:
+      vals.append(fe.extract(im))
+    else:
+      vals.append(vals[i-1])
+  return vals
+
+
+def np2pil(im, swap=True):
+  """Ensure image is Pillow format
+    :param im: image in numpy or PIL.Image format
+    :returns: image in Pillow RGB format
+  """
+  try:
+      im.verify()
+      return im
+  except:
+    if swap:
+      im = cv.cvtColor(im,cv.COLOR_BGR2RGB)
+    return Image.fromarray(im.astype('uint8'), 'RGB')
+
+def pil2np(im, swap=True):
+  """Ensure image is Numpy.ndarry format
+    :param im: image in numpy or PIL.Image format
+    :returns: image in Numpy uint8 format
+  """
+  if type(im) == np.ndarray:
+    return im
+  im = np.asarray(im, np.uint8)
+  if swap:
+    im = cv.cvtColor(im, cv.COLOR_RGB2BGR)
+  return im
+
+
+def resize(im, width=0, height=0):
+  """resize image using imutils. Use w/h=[0 || None] to prioritize other edge size
+    :param im: a Numpy.ndarray image
+    :param wh: a tuple of (width, height)
+  """
+  # TODO change to cv.resize and add algorithm choices
+  w = width
+  h = height
+  if w is 0 and h is 0:
+    return im
+  elif w > 0 and h > 0:
+    ws = im.shape[1] / w
+    hs = im.shape[0] / h
+    if ws > hs:
+      return imutils.resize(im, width=w)
+    else:
+      return imutils.resize(im, height=h)
+  elif w > 0 and h is 0:
+    return imutils.resize(im, width=w)
+  elif w is 0 and h > 0:
+    return imutils.resize(im, height=h)
+  else:
+    return im
+
+def filter_pixellate(im,num_cells):
+  """Pixellate image by downsample then upsample
+    :param im: PIL.Image
+    :returns: PIL.Image
+  """
+  w,h = im.size
+  im = im.resize((num_cells,num_cells), Image.NEAREST)
+  im = im.resize((w,h), Image.NEAREST)
+  return im
+
+# Plot images inline using Matplotlib
+# def pltimg(im,title=None,mode='rgb',figsize=(8,12),dpi=160,output=None):
+#   plt.figure(figsize=figsize)
+#   plt.xticks([]),plt.yticks([])
+#   if title is not None:
+#     plt.title(title)
+#   if mode.lower() == 'bgr':
+#     im = cv.cvtColor(im,cv.COLOR_BGR2RGB)
+
+#   f = plt.gcf()
+#   if mode.lower() =='grey' or mode.lower() == 'gray':
+#     plt.imshow(im,cmap='gray')
+#   else:
+#     plt.imshow(im)
+#   plt.show()
+#   plt.draw()
+#   if output is not None:
+#     bbox_inches='tight'
+#     ext=osp.splitext(output)[1].replace('.','')
+#     f.savefig(output,dpi=dpi,format=ext)
+#     print('Image saved to: {}'.format(output))
+
+
+
+# Utilities for analyzing frames
+
+def compute_gray(im):
+  im = cv.cvtColor(im,cv.COLOR_BGR2GRAY)
+  n_vals = float(im.shape[0] * im.shape[1])
+  avg = np.sum(im[:]) / n_vals
+  return avg
+
+def compute_rgb(im):
+  im = cv.cvtColor(im,cv.COLOR_BGR2RGB)
+  n_vals = float(im.shape[0] * im.shape[1])
+  avg_r = np.sum(im[:,:,0]) / n_vals 
+  avg_g = np.sum(im[:,:,1]) / n_vals
+  avg_b = np.sum(im[:,:,2]) / n_vals
+  avg_rgb = np.sum(im[:,:,:]) / (n_vals * 3.0)
+  return avg_r, avg_b, avg_g, avg_rgb
+
+def compute_hsv(im):
+  im = cv.cvtColor(im,cv.COLOR_BGR2HSV)
+  n_vals = float(im.shape[0] * im.shape[1])
+  avg_h = np.sum(frame[:,:,0]) / n_vals
+  avg_s = np.sum(frame[:,:,1]) / n_vals
+  avg_v = np.sum(frame[:,:,2]) / n_vals
+  avg_hsv = np.sum(frame[:,:,:]) / (n_vals * 3.0)
+  return avg_h, avg_s, avg_v, avg_hsv
+
+def pys_dhash(im, hashSize=8):
+  # resize the input image, adding a single column (width) so we
+  # can compute the horizontal gradient
+  resized = cv.resize(im, (hashSize + 1, hashSize))
+  # compute the (relative) horizontal gradient between adjacent
+  # column pixels
+  diff = resized[:, 1:] > resized[:, :-1]
+  # convert the difference image to a hash
+  return sum([2 ** i for (i, v) in enumerate(diff.flatten()) if v])
+
+
+############################################
+# ImageHash 
+# pip install imagehash
+############################################
+
+
+def compute_ahash(im):
+  """Compute average hash using ImageHash library
+    :param im: Numpy.ndarray
+    :returns: Imagehash.ImageHash
+  """
+  return imagehash.average_hash(ensure_pil(im_pil))
+
+def compute_phash(im):
+  """Compute perceptual hash using ImageHash library
+    :param im: Numpy.ndarray
+    :returns: Imagehash.ImageHash
+  """
+  return imagehash.phash(ensure_pil(im))
+
+def compute_dhash(im):
+  """Compute difference hash using ImageHash library
+    :param im: Numpy.ndarray
+    :returns: Imagehash.ImageHash
+  """
+  return imagehash.dhash(ensure_pil(im))
+
+def compute_whash(im):
+  """Compute wavelet hash using ImageHash library
+    :param im: Numpy.ndarray
+    :returns: Imagehash.ImageHash
+  """
+  return imagehash.whash(ensure_pil(im))
+
+def compute_whash_b64(im):
+  """Compute wavelest hash base64 using ImageHash library
+    :param im: Numpy.ndarray
+    :returns: Imagehash.ImageHash
+  """
+  return lambda im: imagehash.whash(ensure_pil(im), mode='db4')
+
+
+############################################
+# Pillow 
+############################################
+
+def sharpen(im):
+  """Sharpen image using PIL.ImageFilter
+  param: im: PIL.Image
+  returns: PIL.Image
+  """
+  im = ensure_pil(im)
+  im.filter(ImageFilter.SHARPEN)
+  return ensure_np(im)
+
+def fit_image(im,targ_size):
+  """Force fit image by cropping
+  param: im: PIL.Image
+  param: targ_size: a tuple of target (width, height)
+  returns: PIL.Image
+  """
+  im_pil = ensure_pil(im)
+  frame_pil = ImageOps.fit(im_pil, targ_size, 
+    method=Image.BICUBIC, centering=(0.5, 0.5))
+  return ensure_np(frame_pil)
+
+
+def compute_entropy(im):
+  entr_img = entropy(im, disk(10))
+
+
+############################################
+# scikit-learn 
+############################################
+
+def compute_entropy(im):
+  # im is grayscale numpy
+  return entropy(im, disk(10))
+
+############################################
+# OpenCV 
+############################################
+
+def bgr2gray(im):
+  """Wrapper for cv2.cvtColor transform
+    :param im: Numpy.ndarray (BGR)
+    :returns: Numpy.ndarray (Gray)
+  """
+  return cv.cvtColor(im,cv.COLOR_BGR2GRAY)
+
+def gray2bgr(im):
+  """Wrapper for cv2.cvtColor transform
+    :param im: Numpy.ndarray (Gray)
+    :returns: Numpy.ndarray (BGR)
+  """
+  return cv.cvtColor(im,cv.COLOR_GRAY2BGR)
+
+def bgr2rgb(im):
+  """Wrapper for cv2.cvtColor transform
+    :param im: Numpy.ndarray (BGR)
+    :returns: Numpy.ndarray (RGB)
+  """
+  return cv.cvtColor(im,cv.COLOR_BGR2RGB)
+
+def rgb2bgr(im):
+  """Wrapper for cv2.cvtColor transform
+    :param im: Numpy.ndarray (BGR)
+    :returns: Numpy.ndarray (RGB)
+  """
+  return cv.cvtColor(im,cv.COLOR_RGB2BGR)
+
+def compute_laplacian(im):
+  # below 100 is usually blurry
+  return cv.Laplacian(im, cv.CV_64F).var()
+
+
+# http://radjkarl.github.io/imgProcessor/index.html#
+
+def modifiedLaplacian(img):
+    ''''LAPM' algorithm (Nayar89)'''
+    M = np.array([-1, 2, -1])
+    G = cv.getGaussianKernel(ksize=3, sigma=-1)
+    Lx = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=M, kernelY=G)
+    Ly = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=G, kernelY=M)
+    FM = np.abs(Lx) + np.abs(Ly)
+    return cv.mean(FM)[0]
+
+def varianceOfLaplacian(img):
+    ''''LAPV' algorithm (Pech2000)'''
+    lap = cv.Laplacian(img, ddepth=-1)#cv.cv.CV_64F)
+    stdev = cv.meanStdDev(lap)[1]
+    s = stdev[0]**2
+    return s[0]
+
+def tenengrad(img, ksize=3):
+    ''''TENG' algorithm (Krotkov86)'''
+    Gx = cv.Sobel(img, ddepth=cv.CV_64F, dx=1, dy=0, ksize=ksize)
+    Gy = cv.Sobel(img, ddepth=cv.CV_64F, dx=0, dy=1, ksize=ksize)
+    FM = Gx**2 + Gy**2
+    return cv.mean(FM)[0]
+
+def normalizedGraylevelVariance(img):
+    ''''GLVN' algorithm (Santos97)'''
+    mean, stdev = cv.meanStdDev(img)
+    s = stdev[0]**2 / mean[0]
+    return s[0]
+
+def is_blank(im,width=100,sigma=0,thresh_canny=.1,thresh_mean=4,mask=None):
+  # im is graysacale np
+  #im = imutils.resize(im,width=width)
+  #mask = imutils.resize(mask,width=width)
+  if mask is not None:
+    im_canny = feature.canny(im,sigma=sigma,mask=mask)
+    total = len(np.where(mask > 0)[0])
+  else:
+    im_canny = feature.canny(im,sigma=sigma)
+    total = (im.shape[0]*im.shape[1])
+  n_white = len(np.where(im_canny > 0)[0])
+  per = n_white/total
+  if np.mean(im) < thresh_mean or per < thresh_canny:
+    return 1
+  else:
+    return 0
+
+
+def print_timing(t,n):
+    t = time.time()-t
+    print('Elapsed time: {:.2f}'.format(t))
+    print('FPS: {:.2f}'.format(n/t))
+
+def vid2frames(fpath, limit=5000, width=None, idxs=None):
+  """Convert a video file into list of frames
+    :param fpath: filepath to the video file
+    :param limit: maximum number of frames to read
+    :param fpath: the indices of frames to keep (rest are skipped)
+    :returns: (fps, number of frames, list of Numpy.ndarray frames)
+  """
+  frames = []
+  try:
+    cap = cv.VideoCapture(fpath)
+  except:
+    print('[-] Error. Could not read video file: {}'.format(fpath))
+    try:
+      cap.release()
+    except:
+      pass
+    return frames
+
+  fps = cap.get(cv.CAP_PROP_FPS)
+  nframes = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
+
+  if idxs is not None:
+    # read sample indices by seeking to frame index
+    for idx in idxs:
+      cap.set(cv.CAP_PROP_POS_FRAMES, idx)
+      res, frame = cap.read()
+      if width is not None:
+        frame = imutils.resize(frame, width=width)
+      frames.append(frame)
+  else:
+    while(True and len(frames) < limit):
+      res, frame = cap.read()
+      if not res:
+        break
+      if width is not None:
+        frame = imutils.resize(frame, width=width)
+      frames.append(frame)
+
+  cap.release()
+  del cap
+  #return fps,nframes,frames
+  return frames
+
+def convolve_filter(vals,filters=[1]):
+  for k in filters:
+    vals_tmp = np.zeros_like(vals)
+    t = len(vals_tmp)
+    for i,v in enumerate(vals):
+      sum_vals = vals[max(0,i-k):min(t-1,i+k)]
+      vals_tmp[i] = np.mean(sum_vals)
+    vals = vals_tmp.copy()
+  return vals
+
+def cosine_delta(v1,v2):
+  return 1.0 - cosine_similarity(v1.reshape((1, -1)), v2.reshape((1, -1)))[0][0]
+
+
+
+def compute_edges(vals):
+  # find edges (1 = rising, -1 = falling)
+  edges = np.zeros_like(vals)
+  for i in range(len(vals[1:])):
+    delta = vals[i] - vals[i-1]
+    if delta == -1:
+      edges[i] = 1 # rising edge 0 --> 1
+    elif delta == 1:
+      edges[i+1] = 2 # falling edge 1 --> 0
+  # get index for rise fall
+  rising = np.where(np.array(edges) == 1)[0]
+  falling = np.where(np.array(edges) == 2)[0]
+  return rising, falling 
+
+
+############################################
+# Point, Rect
+############################################
+
+class Point(object):
+  def __init__(self, x, y):
+    self.x = x
+    self.y = y
+
+class Rect(object):
+  def __init__(self, p1, p2):
+    '''Store the top, bottom, left and right values for points 
+           p1 and p2 are the (corners) in either order
+    '''
+    self.left   = min(p1.x, p2.x)
+    self.right  = max(p1.x, p2.x)
+    self.top    = min(p1.y, p2.y)
+    self.bottom = max(p1.y, p2.y)
+    
+def overlap(r1, r2):
+    '''Overlapping rectangles overlap both horizontally & vertically
+    '''
+    return range_overlap(r1.left, r1.right, r2.left, r2.right) and \
+      range_overlap(r1.top, r1.bottom, r2.top, r2.bottom)
+
+def range_overlap(a_min, a_max, b_min, b_max):
+  '''Neither range is completely greater than the other
+  '''
+  return (a_min <= b_max) and (b_min <= a_max)
+
+def merge_rects(r1,r2):
+  p1 = Point(min(r1.left,r2.left),min(r1.top,r2.top))
+  p2 = Point(max(r1.right,r2.right),max(r1.bottom,r2.bottom))
+  return Rect(p1,p2)
+
+def is_overlapping(r1,r2):
+  """r1,r2 as [x1,y1,x2,y2] list"""
+  r1x = Rect(Point(r1[0],r1[1]),Point(r1[2],r1[3]))
+  r2x = Rect(Point(r2[0],r2[1]),Point(r2[2],r2[3]))
+  return overlap(r1x,r2x)
+
+def get_rects_merged(rects,bounds,expand=0):
+  """rects: list of points in [x1,y1,x2,y2] format"""
+  rects_expanded = []
+  bx,by = bounds
+  # expand
+  for x1,y1,x2,y2 in rects:
+    x1 = max(0,x1-expand)
+    y1 = max(0,y1-expand)
+    x2 = min(bx,x2+expand)
+    y2 = min(by,y2+expand)
+    rects_expanded.append(Rect(Point(x1,y1),Point(x2,y2)))
+
+  #rects_expanded = [Rect(Point(x1,y1),Point(x2,y2)) for x1,y1,x2,y2 in rects_expanded]
+  rects_merged = []
+  for i,r in enumerate(rects_expanded):
+    found = False
+    for j,rm in enumerate(rects_merged):
+      if overlap(r,rm):
+        rects_merged[j] = merge_rects(r,rm) #expand
+        found = True
+    if not found:
+      rects_merged.append(r)
+  # convert back to [x1,y1,x2,y2] format
+  rects_merged = [(r.left,r.top,r.right,r.bottom) for r in rects_merged]
+  # contract
+  rects_contracted = []
+  for x1,y1,x2,y2 in rects_merged:
+    x1 = min(bx,x1+expand)
+    y1 = min(by,y1+expand)
+    x2 = max(0,x2-expand)
+    y2 = max(0,y2-expand)
+    rects_contracted.append((x1,y1,x2,y2))
+
+  return rects_contracted
+
+
+############################################
+# Image display
+############################################
+
+
+def montage(frames,ncols=4,nrows=None,width=None):
+  """Convert list of frames into a grid montage
+  param: frames: list of frames as Numpy.ndarray
+  param: ncols: number of columns
+  param: width: resize images to this width before adding to grid
+  returns: Numpy.ndarray grid of all images
+  """
+
+  # expand image size if not enough frames
+  if nrows is not None and len(frames) < ncols * nrows:
+    blank = np.zeros_like(frames[0])
+    n = ncols * nrows - len(frames)
+    for i in range(n): frames.append(blank) 
+
+  rows = []
+  for i,im in enumerate(frames):
+    if width is not None:
+      im = imutils.resize(im,width=width)
+    h,w = im.shape[:2]
+    if i % ncols == 0:
+      if i > 0:
+        rows.append(ims)
+      ims = []
+    ims.append(im)
+  if len(ims) > 0:
+    for j in range(ncols-len(ims)):
+      ims.append(np.zeros_like(im))
+    rows.append(ims)
+  row_ims = []
+  for row in rows:
+    row_im = np.hstack(np.array(row))
+    row_ims.append(row_im)
+  contact_sheet = np.vstack(np.array(row_ims))
+  return contact_sheet
+
+
+def make_np_im(wh, color=(0,0,0)):
+  '''Creates Numpy image
+  :param wh: (int, int) width height
+  :param color: (int, int, int) in RGB
+  '''
+  w,h = wh
+  im = np.ones([h,w,3], dtype=np.uint8)
+  im[:] = color[::-1]
+  return im
+\ No newline at end of file
diff --git a/cli/app/utils/logger_utils.py b/cli/app/utils/logger_utils.py
new file mode 100644
index 0000000..f7c9eec
--- /dev/null
+++ b/cli/app/utils/logger_utils.py
@@ -0,0 +1,68 @@
+"""
+Logger instantiator for use with Click utlity scripts
+"""
+import sys
+import os
+import logging
+
+import colorlog
+
+from app.settings import app_cfg as cfg
+
+
+class Logger:
+  
+  logger_name = 'MEGAPIXELS'
+
+  def __init__(self):
+    pass
+
+  @staticmethod
+  def create(verbosity=4, logfile=None):
+    """Configures a logger from click params
+    :param verbosity: (int) between 0 and 5
+    :param logfile: (str) path to logfile
+    :returns: logging root object
+    """
+    
+    loglevel = (5 - (max(0, min(verbosity, 5)))) * 10 # where logging.DEBUG = 10
+    date_format = '%Y-%m-%d %H:%M:%S'
+    if 'colorlog' in sys.modules and os.isatty(2):
+      cformat = '%(log_color)s' + cfg.LOGFILE_FORMAT
+      f = colorlog.ColoredFormatter(cformat, date_format,
+        log_colors = { 'DEBUG'   : 'yellow',       'INFO' : 'white',
+        'WARNING' : 'bold_yellow', 'ERROR': 'bold_red',
+        'CRITICAL': 'bold_red' })
+    else:
+      f = logging.Formatter(cfg.LOGFILE_FORMAT, date_format)
+    
+    # logger = logging.getLogger(Logger.logger_name)
+    logger = logging.getLogger(cfg.LOGGER_NAME)
+    logger.setLevel(loglevel)
+
+    if logfile:
+      # create file handler which logs even debug messages
+      fh = logging.FileHandler(logfile)
+      fh.setLevel(loglevel)
+      logger.addHandler(fh)
+
+    # add colored handler
+    ch = logging.StreamHandler()
+    ch.setFormatter(f)
+    logger.addHandler(ch)
+
+    if verbosity == 0:
+      logger.disabled = True
+
+    # test
+    # logger.debug('Hello Debug')
+    # logger.info('Hello Info')
+    # logger.warn('Hello Warn')
+    # logger.error('Hello Error')
+    # logger.critical('Hello Critical')
+
+    return logger
+
+  @staticmethod
+  def getLogger():
+    return logging.getLogger(cfg.LOGGER_NAME)
+\ No newline at end of file
diff --git a/cli/app/utils/plot_utils.py b/cli/app/utils/plot_utils.py
new file mode 100644
index 0000000..5bbb8ac
--- /dev/null
+++ b/cli/app/utils/plot_utils.py
@@ -0,0 +1,149 @@
+import sys
+from os.path import join
+import time
+import random
+from pathlib import Path
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+import matplotlib.animation
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib import cbook
+from matplotlib import cm
+from matplotlib import animation
+
+
+
+# ---------------------------------------------------------------------------
+#
+# Matplotlib drawing functions
+#
+# ---------------------------------------------------------------------------
+
+# Generate random hex colors
+def rhex():
+  r = lambda: random.randint(0,255)
+  return '#%02X%02X%02X' % (r(), r(), r())
+
+ # line weight
+def generate_3d_landmark_anim(lm, fp_out, num_frames=30, fps=12, dpi=72, size=(480,480),
+  stroke_weight=2, mark_size=10, mark_type='.', bg_clr=(0,0,0), transparent=False):
+  '''Generates animated 3D plot of face landmarks
+  '''
+
+  # convert opencv BGR numpy image to RGB
+  bg_clr_hex = '#%02x%02x%02x' % bg_clr
+  #mark_clr = '#%02x%02x%02x' % mark_clr
+  
+  # center x,y,z
+  xmm = (np.min(lm[:,0]),np.max(lm[:,0]))
+  ymm = (np.min(lm[:,1]),np.max(lm[:,1]))
+  zmm = (np.min(lm[:,2]),np.max(lm[:,2]))
+  
+  # make copy of landmarks
+  lm_orig = lm.copy()
+  xmm = (np.min(lm_orig[:,0]),np.max(lm_orig[:,0]))
+  ymm = (np.min(lm_orig[:,1]),np.max(lm_orig[:,1]))
+  zmm = (np.min(lm_orig[:,2]),np.max(lm_orig[:,2]))
+  
+  # swap the y and z components to improve 3d rotation angles for matplotlib
+  lm = np.zeros_like(lm_orig).astype(np.uint8)
+  for i,p in enumerate(lm_orig):
+    x,y,z = p
+    lm[i] = np.array([x - xmm[0], z - zmm[0], y - ymm[0]])
+  
+  # Create plot
+  figsize = (size[0]/dpi, size[1]/dpi )
+  fig = plt.figure(figsize=figsize, dpi=dpi)  # frameon=False
+  fig.tight_layout()
+  # remove whitespace in matplotlib
+  fig.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=None, hspace=None)
+  ax = fig.add_subplot(111, projection='3d')
+  ax.set_facecolor(bg_clr_hex) # background color
+  
+  xscale, yscale, zscale = (1.2, 1.0, 1.0)
+  
+  # scatter plot the dots
+
+  # jaw line
+  mark_clr = '#%02x%02x%02x' % (0,255,0)  # green
+  ax.plot3D(lm[:17,0]*1.2,lm[:17,1], lm[:17,2],
+            marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+
+  # stage-right eyebrow
+  mark_clr = '#%02x%02x%02x' % (255,0,0)  # green
+  ax.plot3D(lm[17:22,0]*1.2,lm[17:22,1],lm[17:22,2],
+            marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # stage-left eyebrow
+  mark_clr = '#%02x%02x%02x' % (255,255,0)  # yellow
+  ax.plot3D(lm[22:27,0]*1.2,lm[22:27,1],lm[22:27,2], 
+            marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # nose ridge
+  mark_clr = '#%02x%02x%02x' % (0,0,255)  # blue
+  ax.plot3D(lm[27:31,0]*1.2,lm[27:31,1],lm[27:31,2],
+            marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # nose-bottom
+  mark_clr = '#%02x%02x%02x' % (255,0,255)  # magenta
+  ax.plot3D(lm[31:36,0]*1.2,lm[31:36,1],lm[31:36,2],
+            marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # stage-left eye
+  mark_clr = '#%02x%02x%02x' % (0,255,255)  # cyan
+  px, py, pz = lm[36:42,0]*1.2,lm[36:42,1],lm[36:42,2]
+  px = np.append(px, lm[36,0]*1.2)
+  py = np.append(py, lm[36,1])
+  pz = np.append(pz, lm[36,2])
+  ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # stage-right eye
+  mark_clr = '#%02x%02x%02x' % (255,255,255)  # white
+  px, py, pz = lm[42:48,0]*1.2,lm[42:48,1],lm[42:48,2]
+  px = np.append(px, lm[42,0]*1.2)
+  py = np.append(py, lm[42,1])
+  pz = np.append(pz, lm[42,2])
+  ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr,linewidth=stroke_weight)
+  
+  # mouth
+  mark_clr = '#%02x%02x%02x' % (255,125,0)  # orange?
+  px, py, pz = lm[48:,0]*1.2,lm[48:,1],lm[48:,2]
+  px = np.append(px, lm[48,0]*1.2)
+  py = np.append(py, lm[48,1])
+  pz = np.append(pz, lm[48,2])
+  ax.plot3D(px, py, pz, marker=mark_type, markersize=mark_size, color=mark_clr, linewidth=stroke_weight)
+  
+  #rh = '#00ff00'  # edge color
+  #ax.scatter(lm[:,0]*xscale,lm[:,1]*yscale,lm[:,2]*zscale, c=rh, alpha=1.0, s=35, edgecolor=rh)
+  #ax.scatter(lm[:,0]*xscale,lm[:,1]*yscale,lm[:,2]*zscale, c=rh, alpha=1.0, s=1)
+  
+  # center center x,y,z points
+  cx = ((xmm[0] - xmm[1]) // 2) + xmm[1]
+  cy = ((ymm[1] - ymm[0]) // 2) + ymm[0]
+  cz = ((zmm[1] - zmm[0]) // 2) + zmm[0]
+  
+  # remove ticks
+  ax.set_xticks([])
+  ax.set_yticks([])
+  ax.set_zticks([])
+    
+  # remove axis
+  ax.set_frame_on(False)
+  ax.set_axis_off()
+
+  # set initial plot view
+  ax.view_init(elev=120., azim=70.)
+
+  # rotation increments: from 0 to 360 in num_frames
+  phi = np.linspace(0, 2*np.pi, num_frames)
+
+  # animation instruction
+  def update(phi):
+    ax.view_init(180,phi*180./np.pi)
+  
+  ani = matplotlib.animation.FuncAnimation(fig, update, frames=phi)
+  
+  savefig_kwargs = {'pad_inches': 0, 'transparent': transparent}
+  ani.save(fp_out, writer='imagemagick', fps=fps, savefig_kwargs=savefig_kwargs)
+\ No newline at end of file
diff --git a/cli/app/utils/sheet_utils.py b/cli/app/utils/sheet_utils.py
new file mode 100644
index 0000000..85f979c
--- /dev/null
+++ b/cli/app/utils/sheet_utils.py
@@ -0,0 +1,82 @@
+import os
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+
+from app.settings import app_cfg as cfg
+
+def fetch_spreadsheet():
+  """Open the Google Spreadsheet, which contains the individual worksheets"""
+  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
+  path = os.path.dirname(os.path.abspath(__file__))
+  credentials = ServiceAccountCredentials.from_json_keyfile_name(cfg.GOOGLE_ACCOUNT_CREDS_PATH, scope)
+  docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc"
+  client = gspread.authorize(credentials)
+  spreadsheet = client.open_by_key(docid)
+  return spreadsheet
+
+def fetch_worksheet(name):
+  """Get a reference to a particular "worksheet" from the Google Spreadsheet"""
+  spreadsheet = fetch_spreadsheet()
+  return spreadsheet.worksheet(name)
+
+def fetch_google_sheet(name):
+  """Get all the values from a particular worksheet as a list of lists.
+  Returns:
+  :keys - the first row of the document
+  :lines - a list of lists with the rest of the rows"""
+  rows = fetch_worksheet(name).get_all_values()
+  keys = rows[0]
+  lines = rows[1:]
+  return keys, lines
+
+def fetch_google_sheet_objects(name):
+  """Get all the values from a worksheet as a list of dictionaries"""
+  keys, rows = fetch_google_sheet(name)
+  recs = []
+  for row in rows:
+    rec = {}
+    for index, key in enumerate(keys):
+      rec[key] = row[index]
+    recs.append(rec)
+  return recs
+
+def fetch_google_lookup(name, item_key='key'):
+  """Get all the values from a worksheet as a dictionary of dictionaries.
+  Specify which field you want to use as the dictionary key."""
+  keys, rows = fetch_google_sheet(name)
+  lookup = {}
+  for row in rows:
+    rec = {}
+    for index, key in enumerate(keys):
+      rec[key] = row[index]
+    lookup[rec[item_key]] = rec
+  return lookup
+
+def fetch_verified_paper_lookup():
+  """Fetch a lookup keyed by dataset, where each dataset points to a hash of valid or invalid papers..."""
+  keys, rows = fetch_google_sheet('verifications')
+  verified_lookup = {}
+  for row in rows:
+    rec = {}
+    for index, key in enumerate(keys):
+      rec[key] = row[index]
+    if rec['dataset'] not in verified_lookup:
+      verified_lookup[rec['dataset']] = {}
+    if str(rec['uses_dataset']) == '1':
+      verified_lookup[rec['dataset']][rec['paper_id']] = rec
+  return verified_lookup
+
+def update_or_append_worksheet(name, form):
+  """Update a row if it exists, create it if it doesn't"""
+  worksheet = fetch_worksheet(name)
+  keys = worksheet.row_values(1)
+  row = [ form[key] if key in form else '' for key in keys ]
+  try:
+    cell = worksheet.find(form['paper_id'])
+  except:
+    cell = None
+  if cell:
+    for i, item in enumerate(row):
+      worksheet.update_cell(cell.row, i+1, item)
+  else:
+    worksheet.append_row(row)