1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
"""
Begin with this file to process folder of images
- Converts folders and subdirectories into CSV with file attributes split
"""
import click
from app.settings import types
from app.utils import click_utils
from app.settings import app_cfg as cfg
from app.utils.logger_utils import Logger
log = Logger.getLogger()
@click.command()
@click.option('-i', '--input', 'opt_fp_in', required=True,
help='Input directory')
@click.option('-o', '--output', 'opt_fp_out', required=True,
help='Output file for file meta CSV')
@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None),
help='Slice list of files')
@click.option('--recursive/--no-recursive', 'opt_recursive', is_flag=True, default=False,
help='Use glob recursion (slower)')
@click.option('-t', '--threads', 'opt_threads', default=4,
help='Number of threads')
@click.option('-f', '--force', 'opt_force', is_flag=True,
help='Force overwrite file')
@click.pass_context
def cli(ctx, opt_fp_in, opt_fp_out, opt_slice, opt_recursive, opt_threads, opt_force):
"""Multithreading test"""
from glob import glob
from os.path import join
from pathlib import Path
import time
from multiprocessing.dummy import Pool as ThreadPool
import random
import pandas as pd
from tqdm import tqdm
from glob import glob
from app.utils import file_utils, im_utils
if not opt_force and Path(opt_fp_out).exists():
log.error('File exists. Use "-f / --force" to overwite')
return
fp_ims = []
log.info(f'Globbing {opt_fp_in}')
for ext in ['jpg', 'png']:
if opt_recursive:
fp_glob = join(opt_fp_in, '**/*.{}'.format(ext))
fp_ims += glob(fp_glob, recursive=True)
else:
fp_glob = join(opt_fp_in, '*.{}'.format(ext))
fp_ims += glob(fp_glob)
if not fp_ims:
log.warn('No images. Try with "--recursive"')
return
if opt_slice:
fp_ims = fp_ims[opt_slice[0]:opt_slice[1]]
log.info('Processing {:,} images'.format(len(fp_ims)))
# convert data to dict
data = []
for i, fp_im in enumerate(tqdm(fp_ims)):
fpp_im = Path(fp_im)
subdir = str(fpp_im.parent.relative_to(opt_fp_in))
data.append( {
'subdir': subdir,
'fn': fpp_im.stem,
'ext': fpp_im.suffix.replace('.','')
})
# save to CSV
file_utils.mkdirs(opt_fp_out)
df = pd.DataFrame.from_dict(data)
df.index.name = 'index'
df.to_csv(opt_fp_out)
|