summaryrefslogtreecommitdiff
path: root/megapixels/commands/datasets/download_images.py
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/commands/datasets/download_images.py')
-rw-r--r--megapixels/commands/datasets/download_images.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/megapixels/commands/datasets/download_images.py b/megapixels/commands/datasets/download_images.py
index c64afbba..45ca8f6e 100644
--- a/megapixels/commands/datasets/download_images.py
+++ b/megapixels/commands/datasets/download_images.py
@@ -6,9 +6,9 @@ import click
help='Input')
@click.option('-o', '--output', 'opt_fp_out', required=True,
help='Output')
-@click.option('-t', '--threads', 'opt_threads', default=8,
+@click.option('-t', '--threads', 'opt_threads', default=8, show_default=True,
help='Number of threads')
-@click.option('--wayback', 'opt_wayback', is_flag=True,
+@click.option('--wayback', 'opt_wayback', is_flag=True, default=False,
help='Check Wayback archive for URL and download cached image')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback):
@@ -52,7 +52,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback):
estr = str(e)
if item['opt_wayback']:
if 'HTTP Error' in estr:
- # check
+ # TODO add/parse/handle request for wayback machine archive
url_wayback = url_wayback_base + item['url']
fp_error = f'{fp_out}_error.txt'
with open(fp_error, 'w') as fp:
@@ -67,6 +67,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback):
pool_items = []
+ log.debug(f'Initializing multithreaded pool...')
for x in tqdm(records):
fp_dst = join(opt_fp_out, x['filepath'])
fp_dst_is_file = Path(fp_dst).is_file()
@@ -75,7 +76,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback):
pool_items.append({'url':x['url'], 'filepath': fp_dst, 'opt_wayback': opt_wayback})
num_items = len(pool_items)
- log.info(f'processing {num_items:,} items')
+ log.info(f'Going to download {num_items:,} files')
pool_results = []
# run the multithreading with progress bar