2 files changed, 86 insertions, 15 deletions
diff --git a/megapixels/app/utils/file_utils.py b/megapixels/app/utils/file_utils.py
index 773667b1..5c7b39d1 100644
--- a/megapixels/app/utils/file_utils.py
+++ b/megapixels/app/utils/file_utils.py
@@ -40,10 +40,16 @@ log = logging.getLogger(cfg.LOGGER_NAME)
 # File I/O read/write little helpers
 # ------------------------------------------
 
-def glob_multi(dir_in, exts):
+def glob_multi(dir_in, exts, recursive=False):
   files = []
-  for e in exts:
-    files.append(glob(join(dir_in, '*.{}'.format(e))))
+  for ext in exts:
+    if recursive:
+      fp_glob = join(dir_in, '**/*.{}'.format(ext))
+      log.info(f'glob {fp_glob}')
+      files +=  glob(fp_glob, recursive=True)
+    else:
+      fp_glob = join(dir_in, '*.{}'.format(ext))
+      files += glob(fp_glob)
   return files
 
 
@@ -77,7 +83,7 @@ def load_csv(fp_in, as_list=True):
   :returns: list of all CSV data
   """ 
   if not Path(fp_in).exists():
-    log.info('loading {}'.format(fp_in))
+    log.info('not found: {}'.format(fp_in))
   log.info('loading: {}'.format(fp_in))
   with open(fp_in, 'r') as fp:
     items = csv.DictReader(fp)
@@ -86,6 +92,50 @@ def load_csv(fp_in, as_list=True):
     log.info('returning {:,} items'.format(len(items)))
     return items
 
+def unfussy_csv_reader(reader):
+  """Loads a CSV while ignoring possible data errors
+  :param reader: Special reader for load_csv_safe which ignores CSV parse errors
+  """
+  while True:
+    try:
+      yield next(reader)
+    except StopIteration:
+      return
+    except csv.Error:
+      print(csv.Error)
+      # log the problem or whatever
+      continue
+
+def load_csv_safe(fp_in, keys=True, create=False):
+  """Loads a CSV while ignoring possible data errors
+  :param fp_in: string filepath to JSON file
+  :param keys: boolean set to false if the first line is not headers (for some reason)
+  :param create: boolean set to true to return an empty keys/values if the CSV does not exist
+  """ 
+  try:
+    with open(fp_in, 'r', newline='', encoding='utf-8') as f:
+      # reader = csv.reader( (line.replace('\0','') for line in f) )
+      reader = csv.reader(f)
+      lines = list(unfussy_csv_reader(reader))
+      if keys:
+        keys = lines[0]
+        lines = lines[1:]
+        return keys, lines
+      return lines
+  except:
+    if create:
+      if keys:
+        return {}, []
+      return []
+    raise
+
+def load_recipe(fp_in):
+  """Loads a JSON file as an object with properties accessible with dot syntax
+  :param fp_in: string filepath to JSON file
+  """ 
+  with open(path) as fh:
+    return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values()))
+
 
 def lazywrite(data, fp_out, sort_keys=True):
   """Writes JSON or Pickle data"""
@@ -175,7 +225,7 @@ def write_pickle(data, fp_out, ensure_path=True):
     pickle.dump(data, fp)
 
 
-def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True):
+def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False):
   """
   """
   if ensure_path:
@@ -185,6 +235,8 @@ def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True):
       json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys)
     else:
       json.dump(data, fp, indent=2, sort_keys=sort_keys)
+  if verbose:
+    log.info('Wrote JSON: {}'.format(fp_out))
 
 def write_csv(data, fp_out, header=None):
   """ """
@@ -277,7 +329,7 @@ def sha256(fp_in, block_size=65536):
   """
   sha256 = hashlib.sha256()
   with open(fp_in, 'rb') as fp:
-    for block in iter(lambda: f.read(block_size), b''):
+    for block in iter(lambda: fp.read(block_size), b''):
       sha256.update(block)
   return sha256.hexdigest()
 
diff --git a/megapixels/app/utils/im_utils.py b/megapixels/app/utils/im_utils.py
index a0f23cd2..d5e92aa3 100644
--- a/megapixels/app/utils/im_utils.py
+++ b/megapixels/app/utils/im_utils.py
@@ -22,6 +22,16 @@ import datetime
 
 
 
+def is_grayscale(im, threshold=5):
+  """Returns True if image is grayscale
+  :param im: (numpy.array) image
+  :return (bool) of if image is grayscale"""
+  b = im[:,:,0]
+  g = im[:,:,1]
+  mean = np.mean(np.abs(g - b))
+  return mean < threshold
+
+
 def compute_features(fe,frames,phashes,phash_thresh=1):
   """
   Get vector embedding using FeatureExtractor
@@ -40,7 +50,7 @@ def compute_features(fe,frames,phashes,phash_thresh=1):
   return vals
 
 
-def ensure_pil(im, bgr2rgb=False):
+def np2pil(im, swap=True):
   """Ensure image is Pillow format
     :param im: image in numpy or PIL.Image format
     :returns: image in Pillow RGB format
@@ -49,35 +59,44 @@ def ensure_pil(im, bgr2rgb=False):
       im.verify()
       return im
   except:
-    if bgr2rgb:
+    if swap:
       im = cv.cvtColor(im,cv.COLOR_BGR2RGB)
     return Image.fromarray(im.astype('uint8'), 'RGB')
 
-def ensure_np(im):
+def pil2np(im, swap=True):
   """Ensure image is Numpy.ndarry format
     :param im: image in numpy or PIL.Image format
     :returns: image in Numpy uint8 format
   """
   if type(im) == np.ndarray:
-      return im
-  return np.asarray(im, np.uint8)
+    return im
+  im = np.asarray(im, np.uint8)
+  if swap:
+    im = cv.cvtColor(im, cv.COLOR_RGB2BGR)
+  return im
 
 
-def resize(im,width=0,height=0):
+def resize(im, width=0, height=0):
   """resize image using imutils. Use w/h=[0 || None] to prioritize other edge size
     :param im: a Numpy.ndarray image
     :param wh: a tuple of (width, height)
   """
+  # TODO change to cv.resize and add algorithm choices
   w = width
   h = height
   if w is 0 and h is 0:
     return im
   elif w > 0 and h > 0:
-    return imutils.resize(im,width=w,height=h)
+    ws = im.shape[1] / w
+    hs = im.shape[0] / h
+    if ws > hs:
+      return imutils.resize(im, width=w)
+    else:
+      return imutils.resize(im, height=h)
   elif w > 0 and h is 0:
-    return imutils.resize(im,width=w)
+    return imutils.resize(im, width=w)
   elif w is 0 and h > 0:
-    return imutils.resize(im,height=h)
+    return imutils.resize(im, height=h)
   else:
     return im