From 45855c051f415e56306f116a921eefe284139694 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 7 Jan 2020 18:18:38 +0100 Subject: fetch list of images and dedupe --- cli/app/utils/cortex_utils.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'cli/app/utils') diff --git a/cli/app/utils/cortex_utils.py b/cli/app/utils/cortex_utils.py index 328b93a..8076fda 100644 --- a/cli/app/utils/cortex_utils.py +++ b/cli/app/utils/cortex_utils.py @@ -7,24 +7,49 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) from app.settings import app_cfg def api_url(path): + """Generate a base API path""" return "https://lens.neural.garden/api/{}/".format(path) def fetch_cortex_folder(opt_folder_id): + """Fetch all new, non-generated files in a Cortex folder""" rows = fetch_json(api_url('file'), folder_id=opt_folder_id) fp_out_dir = join(app_cfg.DIR_INPUTS, "cortex", str(opt_folder_id)) os.makedirs(fp_out_dir, exist_ok=True) for row in rows: if row['generated'] == 0 and row['processed'] != 1: fn, ext = os.path.splitext(row['name']) - fp_out_image = join(fp_out_dir, "{}{}".format(row['id'], ext)) + fp_out_image = join(fp_out_dir, row['name']) if not os.path.exists(fp_out_image): + row['path'] = fp_out_image fetch_file(row['url'], fp_out_image) + return rows + +def find_unprocessed_files(files): + """Find files that haven't been processed yet. + This is implied if no matching generated file is found. + """ + datasets = {} + unprocessed_files = [] + for file in files: + if file['generated'] == 1: + fn, ext = os.path.splitext(file['name']) + dataset = fn.split('-')[0] + datasets[dataset] = file['id'] + for file in files: + if file['generated'] == 0 and file['processed'] != 1: + fn, ext = os.path.splitext(file['name']) + dataset = fn.split('-')[0] + if dataset not in datasets: + unprocessed_files.append(file) + return unprocessed_files def fetch_json(url, **kwargs): + """HTTP GET some JSON""" resp = requests.get(url, params=kwargs, verify=False, timeout=10) return None if resp.status_code != 200 else resp.json() def fetch_file(url, fn, **kwargs): + """HTTP GET a binary file and write it to disk""" print("Fetch {} => {}".format(url, fn)) try: resp = requests.get(url, params=kwargs, verify=False, timeout=10) @@ -41,6 +66,7 @@ def fetch_file(url, fn, **kwargs): return size def upload_fp_to_cortex(opt_folder_id, fp): + """Upload a open file/BytesIO object""" files = { 'file': fp } @@ -52,13 +78,14 @@ def upload_fp_to_cortex(opt_folder_id, fp): 'datatype': 'image', } url = os.path.join(api_url('folder'), opt_folder_id, 'upload/') - print(url) r = requests.post(url, files=files, data=data) - print(r.json()) + return None if resp.status_code != 200 else resp.json() def upload_bytes_to_cortex(opt_folder_id, fn, fp, mimetype): - upload_fp_to_cortex(opt_folder_id, (fn, fp.getvalue(), mimetype,)) + """Upload a BytesIO object""" + return upload_fp_to_cortex(opt_folder_id, (fn, fp.getvalue(), mimetype,)) def upload_file_to_cortex(opt_folder_id, fn): + """Upload a file from disk""" with open(fn, 'rb') as fp: - upload_fp_to_cortex(opt_folder_id, fp) + return upload_fp_to_cortex(opt_folder_id, fp) -- cgit v1.2.3-70-g09d2