diff options
Diffstat (limited to 'megapixels/app/utils/file_utils.py')
| -rw-r--r-- | megapixels/app/utils/file_utils.py | 64 |
1 files changed, 58 insertions, 6 deletions
diff --git a/megapixels/app/utils/file_utils.py b/megapixels/app/utils/file_utils.py index 773667b1..5c7b39d1 100644 --- a/megapixels/app/utils/file_utils.py +++ b/megapixels/app/utils/file_utils.py @@ -40,10 +40,16 @@ log = logging.getLogger(cfg.LOGGER_NAME) # File I/O read/write little helpers # ------------------------------------------ -def glob_multi(dir_in, exts): +def glob_multi(dir_in, exts, recursive=False): files = [] - for e in exts: - files.append(glob(join(dir_in, '*.{}'.format(e)))) + for ext in exts: + if recursive: + fp_glob = join(dir_in, '**/*.{}'.format(ext)) + log.info(f'glob {fp_glob}') + files += glob(fp_glob, recursive=True) + else: + fp_glob = join(dir_in, '*.{}'.format(ext)) + files += glob(fp_glob) return files @@ -77,7 +83,7 @@ def load_csv(fp_in, as_list=True): :returns: list of all CSV data """ if not Path(fp_in).exists(): - log.info('loading {}'.format(fp_in)) + log.info('not found: {}'.format(fp_in)) log.info('loading: {}'.format(fp_in)) with open(fp_in, 'r') as fp: items = csv.DictReader(fp) @@ -86,6 +92,50 @@ def load_csv(fp_in, as_list=True): log.info('returning {:,} items'.format(len(items))) return items +def unfussy_csv_reader(reader): + """Loads a CSV while ignoring possible data errors + :param reader: Special reader for load_csv_safe which ignores CSV parse errors + """ + while True: + try: + yield next(reader) + except StopIteration: + return + except csv.Error: + print(csv.Error) + # log the problem or whatever + continue + +def load_csv_safe(fp_in, keys=True, create=False): + """Loads a CSV while ignoring possible data errors + :param fp_in: string filepath to JSON file + :param keys: boolean set to false if the first line is not headers (for some reason) + :param create: boolean set to true to return an empty keys/values if the CSV does not exist + """ + try: + with open(fp_in, 'r', newline='', encoding='utf-8') as f: + # reader = csv.reader( (line.replace('\0','') for line in f) ) + reader = csv.reader(f) + lines = list(unfussy_csv_reader(reader)) + if keys: + keys = lines[0] + lines = lines[1:] + return keys, lines + return lines + except: + if create: + if keys: + return {}, [] + return [] + raise + +def load_recipe(fp_in): + """Loads a JSON file as an object with properties accessible with dot syntax + :param fp_in: string filepath to JSON file + """ + with open(path) as fh: + return json.load(fh, object_hook=lambda d: collections.namedtuple('X', d.keys())(*d.values())) + def lazywrite(data, fp_out, sort_keys=True): """Writes JSON or Pickle data""" @@ -175,7 +225,7 @@ def write_pickle(data, fp_out, ensure_path=True): pickle.dump(data, fp) -def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True): +def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True, verbose=False): """ """ if ensure_path: @@ -185,6 +235,8 @@ def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True): json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys) else: json.dump(data, fp, indent=2, sort_keys=sort_keys) + if verbose: + log.info('Wrote JSON: {}'.format(fp_out)) def write_csv(data, fp_out, header=None): """ """ @@ -277,7 +329,7 @@ def sha256(fp_in, block_size=65536): """ sha256 = hashlib.sha256() with open(fp_in, 'rb') as fp: - for block in iter(lambda: f.read(block_size), b''): + for block in iter(lambda: fp.read(block_size), b''): sha256.update(block) return sha256.hexdigest() |
