summaryrefslogtreecommitdiff
path: root/megapixels/app/utils
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/app/utils')
-rw-r--r--megapixels/app/utils/file_utils.py64
-rw-r--r--megapixels/app/utils/im_utils.py37
2 files changed, 86 insertions, 15 deletions
diff --git a/megapixels/app/utils/file_utils.py b/megapixels/app/utils/file_utils.py
index 773667b1..5c7b39d1 100644
--- a/megapixels/app/utils/file_utils.py
+++ b/megapixels/app/utils/file_utils.py
@@ -40,10 +40,16 @@ log = logging.getLogger(cfg.LOGGER_NAME)
# File I/O read/write little helpers
# ------------------------------------------
-def glob_multi(dir_in, exts):
+def glob_multi(dir_in, exts, recursive=False):
files = []
- for e in exts:
- files.append(glob(join(dir_in, '*.{}'.format(e))))
+ for ext in exts:
+ if recursive:
+ fp_glob = join(dir_in, '**/*.{}'.format(ext))
+ log.info(f'glob {fp_glob}')
+ files += glob(fp_glob, recursive=True)
+ else:
+ fp_glob = join(dir_in, '*.{}'.format(ext))
+ files += glob(fp_glob)
return files
@@ -77,7 +83,7 @@ def load_csv(fp_in, as_list=True):
:returns: list of all CSV data
"""
if not Path(fp_in).exists():
- log.info('loading {}'.format(fp_in))
+ log.info('not found: {}'.format(fp_in))
log.info('loading: {}'.format(fp_in))
with open(fp_in, 'r') as fp:
items = csv.DictReader(fp)
@@ -86,6 +92,50 @@ def load_csv(fp_in, as_list=True):
log.info('returning {:,} items'.format(len(items)))
return items
+def unfussy_csv_reader(reader):
+ """Loads a CSV while ignoring possible data errors
+ :param reader: Special reader for load_csv_safe which ignores CSV parse errors
+ """
+ while True:
+ try:
+ yield next(reader)
+ except StopIteration:
+ return
+ except csv.Error:
+ print(csv.Error)
+ # log the problem or whatever
+ continue
+
+def load_csv_safe(fp_in, keys=True, create=False):
+ """Loads a CSV while ignoring possible data errors
+ :param fp_in: string filepath to JSON file
+ :param keys: boolean set to false if the first line is not headers (for some reason)
+ :param create: boolean set to true to return an empty keys/values if the CSV does not exist
+ """
+ try:
+ with open(fp_in, 'r', newline='', encoding='utf-8') as f:
+ # reader = csv.reader( (line.replace('\0','') for line in f) )
+ reader = csv.reader(f)
+ lines = list(unfussy_csv_reader(reader))
+ if keys:
+ keys = lines[0]
+ lines = lines[1:]
+ return keys, lines
+ return lines
+ except:
+ if create:
+ if keys:
+ return {}, []
+ return []
+ raise
+
+def load_recipe(fp_in):
+ """Loads a JSON file as an object with properties accessible with dot syntax
+ :param fp_in: string filepath to JSON file
+ """
+ with open(path) as fh:
+ return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values()))
+
def lazywrite(data, fp_out, sort_keys=True):
"""Writes JSON or Pickle data"""
@@ -175,7 +225,7 @@ def write_pickle(data, fp_out, ensure_path=True):
pickle.dump(data, fp)
-def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True):
+def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False):
"""
"""
if ensure_path:
@@ -185,6 +235,8 @@ def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True):
json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys)
else:
json.dump(data, fp, indent=2, sort_keys=sort_keys)
+ if verbose:
+ log.info('Wrote JSON: {}'.format(fp_out))
def write_csv(data, fp_out, header=None):
""" """
@@ -277,7 +329,7 @@ def sha256(fp_in, block_size=65536):
"""
sha256 = hashlib.sha256()
with open(fp_in, 'rb') as fp:
- for block in iter(lambda: f.read(block_size), b''):
+ for block in iter(lambda: fp.read(block_size), b''):
sha256.update(block)
return sha256.hexdigest()
diff --git a/megapixels/app/utils/im_utils.py b/megapixels/app/utils/im_utils.py
index a0f23cd2..d5e92aa3 100644
--- a/megapixels/app/utils/im_utils.py
+++ b/megapixels/app/utils/im_utils.py
@@ -22,6 +22,16 @@ import datetime
+def is_grayscale(im, threshold=5):
+ """Returns True if image is grayscale
+ :param im: (numpy.array) image
+ :return (bool) of if image is grayscale"""
+ b = im[:,:,0]
+ g = im[:,:,1]
+ mean = np.mean(np.abs(g - b))
+ return mean < threshold
+
+
def compute_features(fe,frames,phashes,phash_thresh=1):
"""
Get vector embedding using FeatureExtractor
@@ -40,7 +50,7 @@ def compute_features(fe,frames,phashes,phash_thresh=1):
return vals
-def ensure_pil(im, bgr2rgb=False):
+def np2pil(im, swap=True):
"""Ensure image is Pillow format
:param im: image in numpy or PIL.Image format
:returns: image in Pillow RGB format
@@ -49,35 +59,44 @@ def ensure_pil(im, bgr2rgb=False):
im.verify()
return im
except:
- if bgr2rgb:
+ if swap:
im = cv.cvtColor(im,cv.COLOR_BGR2RGB)
return Image.fromarray(im.astype('uint8'), 'RGB')
-def ensure_np(im):
+def pil2np(im, swap=True):
"""Ensure image is Numpy.ndarry format
:param im: image in numpy or PIL.Image format
:returns: image in Numpy uint8 format
"""
if type(im) == np.ndarray:
- return im
- return np.asarray(im, np.uint8)
+ return im
+ im = np.asarray(im, np.uint8)
+ if swap:
+ im = cv.cvtColor(im, cv.COLOR_RGB2BGR)
+ return im
-def resize(im,width=0,height=0):
+def resize(im, width=0, height=0):
"""resize image using imutils. Use w/h=[0 || None] to prioritize other edge size
:param im: a Numpy.ndarray image
:param wh: a tuple of (width, height)
"""
+ # TODO change to cv.resize and add algorithm choices
w = width
h = height
if w is 0 and h is 0:
return im
elif w > 0 and h > 0:
- return imutils.resize(im,width=w,height=h)
+ ws = im.shape[1] / w
+ hs = im.shape[0] / h
+ if ws > hs:
+ return imutils.resize(im, width=w)
+ else:
+ return imutils.resize(im, height=h)
elif w > 0 and h is 0:
- return imutils.resize(im,width=w)
+ return imutils.resize(im, width=w)
elif w is 0 and h > 0:
- return imutils.resize(im,height=h)
+ return imutils.resize(im, height=h)
else:
return im