diff options
Diffstat (limited to 'megapixels/commands/datasets/download_images.py')
| -rw-r--r-- | megapixels/commands/datasets/download_images.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/megapixels/commands/datasets/download_images.py b/megapixels/commands/datasets/download_images.py index c64afbba..45ca8f6e 100644 --- a/megapixels/commands/datasets/download_images.py +++ b/megapixels/commands/datasets/download_images.py @@ -6,9 +6,9 @@ import click help='Input') @click.option('-o', '--output', 'opt_fp_out', required=True, help='Output') -@click.option('-t', '--threads', 'opt_threads', default=8, +@click.option('-t', '--threads', 'opt_threads', default=8, show_default=True, help='Number of threads') -@click.option('--wayback', 'opt_wayback', is_flag=True, +@click.option('--wayback', 'opt_wayback', is_flag=True, default=False, help='Check Wayback archive for URL and download cached image') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback): @@ -52,7 +52,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback): estr = str(e) if item['opt_wayback']: if 'HTTP Error' in estr: - # check + # TODO add/parse/handle request for wayback machine archive url_wayback = url_wayback_base + item['url'] fp_error = f'{fp_out}_error.txt' with open(fp_error, 'w') as fp: @@ -67,6 +67,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback): pool_items = [] + log.debug(f'Initializing multithreaded pool...') for x in tqdm(records): fp_dst = join(opt_fp_out, x['filepath']) fp_dst_is_file = Path(fp_dst).is_file() @@ -75,7 +76,7 @@ def cli(ctx, opt_fp_in, opt_fp_out, opt_threads, opt_wayback): pool_items.append({'url':x['url'], 'filepath': fp_dst, 'opt_wayback': opt_wayback}) num_items = len(pool_items) - log.info(f'processing {num_items:,} items') + log.info(f'Going to download {num_items:,} files') pool_results = [] # run the multithreading with progress bar |
