diff options
| author | Adam Harvey <adam@ahprojects.com> | 2018-12-23 01:37:03 +0100 |
|---|---|---|
| committer | Adam Harvey <adam@ahprojects.com> | 2018-12-23 01:37:03 +0100 |
| commit | 4452e02e8b04f3476273574a875bb60cfbb4568b (patch) | |
| tree | 3ffa44f9621b736250a8b94da14a187dc785c2fe /megapixels/app/utils | |
| parent | 2a65f7a157bd4bace970cef73529867b0e0a374d (diff) | |
| parent | 5340bee951c18910fd764241945f1f136b5a22b4 (diff) | |
.
Diffstat (limited to 'megapixels/app/utils')
| -rw-r--r-- | megapixels/app/utils/file_utils.py | 64 | ||||
| -rw-r--r-- | megapixels/app/utils/im_utils.py | 37 |
2 files changed, 86 insertions, 15 deletions
diff --git a/megapixels/app/utils/file_utils.py b/megapixels/app/utils/file_utils.py index 773667b1..5c7b39d1 100644 --- a/megapixels/app/utils/file_utils.py +++ b/megapixels/app/utils/file_utils.py @@ -40,10 +40,16 @@ log = logging.getLogger(cfg.LOGGER_NAME) # File I/O read/write little helpers # ------------------------------------------ -def glob_multi(dir_in, exts): +def glob_multi(dir_in, exts, recursive=False): files = [] - for e in exts: - files.append(glob(join(dir_in, '*.{}'.format(e)))) + for ext in exts: + if recursive: + fp_glob = join(dir_in, '**/*.{}'.format(ext)) + log.info(f'glob {fp_glob}') + files += glob(fp_glob, recursive=True) + else: + fp_glob = join(dir_in, '*.{}'.format(ext)) + files += glob(fp_glob) return files @@ -77,7 +83,7 @@ def load_csv(fp_in, as_list=True): :returns: list of all CSV data """ if not Path(fp_in).exists(): - log.info('loading {}'.format(fp_in)) + log.info('not found: {}'.format(fp_in)) log.info('loading: {}'.format(fp_in)) with open(fp_in, 'r') as fp: items = csv.DictReader(fp) @@ -86,6 +92,50 @@ def load_csv(fp_in, as_list=True): log.info('returning {:,} items'.format(len(items))) return items +def unfussy_csv_reader(reader): + """Loads a CSV while ignoring possible data errors + :param reader: Special reader for load_csv_safe which ignores CSV parse errors + """ + while True: + try: + yield next(reader) + except StopIteration: + return + except csv.Error: + print(csv.Error) + # log the problem or whatever + continue + +def load_csv_safe(fp_in, keys=True, create=False): + """Loads a CSV while ignoring possible data errors + :param fp_in: string filepath to JSON file + :param keys: boolean set to false if the first line is not headers (for some reason) + :param create: boolean set to true to return an empty keys/values if the CSV does not exist + """ + try: + with open(fp_in, 'r', newline='', encoding='utf-8') as f: + # reader = csv.reader( (line.replace('\0','') for line in f) ) + reader = csv.reader(f) + lines = list(unfussy_csv_reader(reader)) + if keys: + keys = lines[0] + lines = lines[1:] + return keys, lines + return lines + except: + if create: + if keys: + return {}, [] + return [] + raise + +def load_recipe(fp_in): + """Loads a JSON file as an object with properties accessible with dot syntax + :param fp_in: string filepath to JSON file + """ + with open(path) as fh: + return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values())) + def lazywrite(data, fp_out, sort_keys=True): """Writes JSON or Pickle data""" @@ -175,7 +225,7 @@ def write_pickle(data, fp_out, ensure_path=True): pickle.dump(data, fp) -def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True): +def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False): """ """ if ensure_path: @@ -185,6 +235,8 @@ def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True): json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys) else: json.dump(data, fp, indent=2, sort_keys=sort_keys) + if verbose: + log.info('Wrote JSON: {}'.format(fp_out)) def write_csv(data, fp_out, header=None): """ """ @@ -277,7 +329,7 @@ def sha256(fp_in, block_size=65536): """ sha256 = hashlib.sha256() with open(fp_in, 'rb') as fp: - for block in iter(lambda: f.read(block_size), b''): + for block in iter(lambda: fp.read(block_size), b''): sha256.update(block) return sha256.hexdigest() diff --git a/megapixels/app/utils/im_utils.py b/megapixels/app/utils/im_utils.py index a0f23cd2..d5e92aa3 100644 --- a/megapixels/app/utils/im_utils.py +++ b/megapixels/app/utils/im_utils.py @@ -22,6 +22,16 @@ import datetime +def is_grayscale(im, threshold=5): + """Returns True if image is grayscale + :param im: (numpy.array) image + :return (bool) of if image is grayscale""" + b = im[:,:,0] + g = im[:,:,1] + mean = np.mean(np.abs(g - b)) + return mean < threshold + + def compute_features(fe,frames,phashes,phash_thresh=1): """ Get vector embedding using FeatureExtractor @@ -40,7 +50,7 @@ def compute_features(fe,frames,phashes,phash_thresh=1): return vals -def ensure_pil(im, bgr2rgb=False): +def np2pil(im, swap=True): """Ensure image is Pillow format :param im: image in numpy or PIL.Image format :returns: image in Pillow RGB format @@ -49,35 +59,44 @@ def ensure_pil(im, bgr2rgb=False): im.verify() return im except: - if bgr2rgb: + if swap: im = cv.cvtColor(im,cv.COLOR_BGR2RGB) return Image.fromarray(im.astype('uint8'), 'RGB') -def ensure_np(im): +def pil2np(im, swap=True): """Ensure image is Numpy.ndarry format :param im: image in numpy or PIL.Image format :returns: image in Numpy uint8 format """ if type(im) == np.ndarray: - return im - return np.asarray(im, np.uint8) + return im + im = np.asarray(im, np.uint8) + if swap: + im = cv.cvtColor(im, cv.COLOR_RGB2BGR) + return im -def resize(im,width=0,height=0): +def resize(im, width=0, height=0): """resize image using imutils. Use w/h=[0 || None] to prioritize other edge size :param im: a Numpy.ndarray image :param wh: a tuple of (width, height) """ + # TODO change to cv.resize and add algorithm choices w = width h = height if w is 0 and h is 0: return im elif w > 0 and h > 0: - return imutils.resize(im,width=w,height=h) + ws = im.shape[1] / w + hs = im.shape[0] / h + if ws > hs: + return imutils.resize(im, width=w) + else: + return imutils.resize(im, height=h) elif w > 0 and h is 0: - return imutils.resize(im,width=w) + return imutils.resize(im, width=w) elif w is 0 and h > 0: - return imutils.resize(im,height=h) + return imutils.resize(im, height=h) else: return im |
