diff options
32 files changed, 4263 insertions, 0 deletions
@@ -1,3 +1,6 @@ +# Custom +3rdparty/ + # Project specific webpack-stats.dev.json @@ -1,3 +1,7 @@ +# MegaPixels + +FaceQuery.me, mozilla, nytimes + # megapixels dev ## installation @@ -29,3 +33,4 @@ aaaaiiiiVVVVVdddrrrrrrrraaaaiSSSQVdd%%&&rrrrrraaaaiiSQQQQ%Q%%%%rrrrr xxxxdddd55VVVddv7777~~~~aaaaiiiiVVddddrrrrrrrraaaaiiiiVVVVdVdddd7777 xxxxdddd5555vvvv7777~~~~xxxxdiiiV555vvvv7777~~~~xxxxdddd5555vvvv7777 ``` +>>>>>>> 0dc3e40434c23e4d48119465f39b03bf35fb56bd diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..5f608f80 --- /dev/null +++ b/environment.yml @@ -0,0 +1,207 @@ +name: megapixels +channels: + - pytorch + - conda-forge + - alexbw + - defaults +dependencies: + - lua=5.3.2=1 + - lua-cwrap=0.1=lua5.3_2 + - lua-paths=0.1=lua5.3_1 + - lua-torch=7.0.3=lua5.3_0 + - luarocks=2.2.1=lua5.3_0 + - ca-certificates=2018.4.16=0 + - mpi=1.0=openmpi + - openmpi=3.1.0=h26a2512_3 + - _nb_ext_conf=0.4.0=py36_1 + - anaconda-client=1.6.14=py36_0 + - asn1crypto=0.24.0=py36_0 + - backcall=0.1.0=py36_0 + - blas=1.0=mkl + - bleach=2.1.3=py36_0 + - bzip2=1.0.6=h14c3975_5 + - cairo=1.14.12=h7636065_2 + - certifi=2018.4.16=py36_0 + - cffi=1.11.5=py36h9745a5d_0 + - chardet=3.0.4=py36h0f667ec_1 + - clyent=1.2.2=py36h7e57e65_1 + - cryptography=2.2.2=py36h14c3975_0 + - cudatoolkit=9.0=h13b8566_0 + - decorator=4.3.0=py36_0 + - entrypoints=0.2.3=py36h1aec115_2 + - ffmpeg=4.0=h04d0a96_0 + - fontconfig=2.12.6=h49f89f6_0 + - freetype=2.8=hab7d2ae_1 + - glib=2.56.1=h000015b_0 + - gmp=6.1.2=h6c8ec71_1 + - graphite2=1.3.11=h16798f4_2 + - harfbuzz=1.7.6=h5f0a787_1 + - hdf5=1.10.2=hba1933b_1 + - html5lib=1.0.1=py36h2f9c1c0_0 + - icu=58.2=h9c2bf20_1 + - idna=2.7=py36_0 + - intel-openmp=2018.0.3=0 + - ipykernel=4.8.2=py36_0 + - ipython=6.4.0=py36_0 + - ipython_genutils=0.2.0=py36hb52b0d5_0 + - ipywidgets=7.2.1=py36_0 + - jasper=1.900.1=hd497a04_4 + - jedi=0.12.0=py36_1 + - jinja2=2.10=py36ha16c418_0 + - jpeg=9b=h024ee3a_2 + - jsonschema=2.6.0=py36h006f8b5_0 + - jupyter_client=5.2.3=py36_0 + - jupyter_core=4.4.0=py36h7c827e3_0 + - libedit=3.1.20170329=h6b74fdf_2 + - libffi=3.2.1=hd88cf55_4 + - libgcc-ng=7.2.0=hdf63c60_3 + - libgfortran=3.0.0=1 + - libgfortran-ng=7.2.0=hdf63c60_3 + - libopencv=3.4.1=h1a3b859_1 + - libopus=1.2.1=hb9ed12e_0 + - libpng=1.6.34=hb9fc6fc_0 + - libprotobuf=3.5.2=h6f1eeef_0 + - libsodium=1.0.16=h1bed415_0 + - libstdcxx-ng=7.2.0=hdf63c60_3 + - libtiff=4.0.9=he85c1e1_1 + - libvpx=1.7.0=h439df22_0 + - libxcb=1.13=h1bed415_1 + - libxml2=2.9.8=h26e45fe_1 + - markupsafe=1.0=py36hd9260cd_1 + - mistune=0.8.3=py36h14c3975_1 + - mkl=2018.0.3=1 + - mkl-rt=11.1=p0 + - mkl_fft=1.0.1=py36h3010b51_0 + - mkl_random=1.0.1=py36h629b387_0 + - nb_anacondacloud=1.4.0=py36_0 + - nb_conda=2.2.1=py36h8118bb2_0 + - nb_conda_kernels=2.1.0=py36_0 + - nbconvert=5.3.1=py36hb41ffb7_0 + - nbformat=4.4.0=py36h31c9010_0 + - nbpresent=3.0.2=py36h5f95a39_1 + - ncurses=6.1=hf484d3e_0 + - ninja=1.8.2=py36h6bb024c_1 + - notebook=5.5.0=py36_0 + - numpy=1.14.5=py36hcd700cb_3 + - numpy-base=1.14.5=py36hdbf6ddf_3 + - olefile=0.45.1=py36_0 + - opencv=3.4.1=py36h6fd60c2_2 + - openssl=1.0.2o=h20670df_0 + - pandoc=2.2.1=h629c226_0 + - pandocfilters=1.4.2=py36ha6701b7_1 + - parso=0.2.1=py36_0 + - pcre=8.42=h439df22_0 + - pexpect=4.6.0=py36_0 + - pickleshare=0.7.4=py36h63277f8_0 + - pillow=5.1.0=py36h3deb7b8_0 + - pip=10.0.1=py36_0 + - pixman=0.34.0=hceecf20_3 + - prompt_toolkit=1.0.15=py36h17d85b1_0 + - ptyprocess=0.6.0=py36_0 + - py-opencv=3.4.1=py36h0676e08_1 + - pycparser=2.18=py36hf9f622e_1 + - pygments=2.2.0=py36h0d3125c_0 + - pyopenssl=18.0.0=py36_0 + - pysocks=1.6.8=py36_0 + - python=3.6.6=hc3d631a_0 + - python-dateutil=2.7.3=py36_0 + - pytz=2018.5=py36_0 + - pyyaml=3.12=py36hafb9ca4_1 + - pyzmq=17.0.0=py36h14c3975_0 + - readline=7.0=ha6073c6_4 + - requests=2.19.1=py36_0 + - send2trash=1.5.0=py36_0 + - setuptools=39.2.0=py36_0 + - simplegeneric=0.8.1=py36_2 + - six=1.11.0=py36h372c433_1 + - sqlite=3.24.0=h84994c4_0 + - terminado=0.8.1=py36_1 + - testpath=0.3.1=py36h8cadb63_0 + - tk=8.6.7=hc745277_3 + - tornado=5.0.2=py36_0 + - traitlets=4.3.2=py36h674d592_0 + - urllib3=1.23=py36_0 + - wcwidth=0.1.7=py36hdf4376a_0 + - webencodings=0.5.1=py36h800622e_1 + - wheel=0.31.1=py36_0 + - widgetsnbextension=3.2.1=py36_0 + - xz=5.2.4=h14c3975_4 + - yaml=0.1.7=had09818_2 + - zeromq=4.2.5=h439df22_0 + - zlib=1.2.11=ha838bed_2 + - cuda90=1.0=h6433d27_0 + - pytorch=0.4.0=py36_cuda9.0.176_cudnn7.1.2_1 + - torchvision=0.2.1=py36_1 + - pip: + - absl-py==0.2.2 + - astor==0.7.1 + - beautifulsoup4==4.6.0 + - blocksparse==1.0.0 + - bs4==0.0.1 + - cachetools==2.1.0 + - click==6.7 + - cloudpickle==0.5.3 + - cycler==0.10.0 + - cython==0.28.4 + - dask==0.18.1 + - dlib==19.15.0 + - flask==1.0.2 + - flask-cors==3.0.6 + - future==0.16.0 + - gast==0.2.0 + - google-api-core==1.4.1 + - google-auth==1.5.1 + - google-cloud-core==0.28.1 + - google-cloud-storage==1.13.0 + - google-images-download==2.3.0 + - google-resumable-media==0.3.1 + - googleapis-common-protos==1.6.0b6 + - grpcio==1.13.0 + - h5py==2.8.0 + - horovod==0.13.8 + - imagehash==4.0 + - imageio==2.3.0 + - imutils==0.4.6 + - itsdangerous==0.24 + - jonasz-master-thesis==0.1 + - keras==2.2.0 + - keras-applications==1.0.2 + - keras-preprocessing==1.0.1 + - kiwisolver==1.0.1 + - lmdb==0.94 + - markdown==2.6.11 + - matplotlib==2.2.2 + - moviepy==0.2.3.5 + - networkx==2.1 + - opencv-python==3.4.2.17 + - pandas==0.23.3 + - protobuf==3.6.0 + - pyasn1==0.4.4 + - pyasn1-modules==0.2.2 + - pyglet==1.3.2 + - pymediainfo==2.3.0 + - pyopengl==3.1.0 + - pyparsing==2.2.0 + - python-slugify==1.2.5 + - pywavefront==0.3.2 + - pywavelets==0.5.2 + - rsa==4.0 + - scikit-image==0.14.0 + - scikit-learn==0.19.2 + - scipy==1.1.0 + - selenium==3.13.0 + - tensorboard==1.8.0 + - tensorflow-gpu==1.8.0 + - termcolor==1.1.0 + - tflearn==0.3.2 + - toolz==0.9.0 + - toposort==1.5 + - torch==0.4.0 + - tqdm==4.23.4 + - unicode==2.6 + - unidecode==1.0.22 + - visvis==1.11.1 + - werkzeug==0.14.1 + - wikipedia==1.4.0 +prefix: /home/adam/anaconda3/envs/megapixels + diff --git a/megapixels/admin/commands/rsync.py b/megapixels/admin/commands/rsync.py new file mode 100644 index 00000000..a821b460 --- /dev/null +++ b/megapixels/admin/commands/rsync.py @@ -0,0 +1,106 @@ +""" +Parallel rsync media_records between drives +For parallel rsync with media records, use vframe/commands/rsync +""" + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'dir_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'dir_out', required=True, + help='Output directory') +@click.option('-t', '--threads', 'opt_threads', default=8, + help='Number of threads') +@click.option('--validate/--no-validate', 'opt_validate', is_flag=True, default=False, + help='Validate files after copy') +@click.option('--extract/--no-extract', 'opt_extract', is_flag=True, default=False, + help='Extract files after copy') +@click.pass_context +def cli(ctx, dir_in, dir_out, opt_threads, opt_validate, opt_extract): + """rsync folders""" + + import os + from os.path import join + from pathlib import Path + + # NB deactivate logger in imported module + import logging + logging.getLogger().addHandler(logging.NullHandler()) + from parallel_sync import rsync + + from app.settings.paths import Paths + from app.utils import logger_utils, file_utils + + # ------------------------------------------------- + # process here + + log = logger_utils.Logger.getLogger() + log.info('RSYNC from {} to {}'.format(dir_in, dir_out)) + log.info('opt_extract: {}'.format(opt_extract)) + log.info('opt_validate: {}'.format(opt_validate)) + log.info('opt_threads: {}'.format(opt_validate)) + + file_utils.mkdirs(dir_out) + + rsync.copy(dir_in, dir_out, parallelism=opt_threads, + validate=opt_validate, extract=opt_extract) + + log.info('done rsyncing') + + + # --------------------------------------------------------------- + + + + # if dir_in: + # # use input filepath as source + # if not Path(dir_in).is_dir(): + # log.error('{} is not a directory'.format(dir_in)) + # ctx.exit() + # if not Path(dir_out).is_dir(): + # ctx.log.error('{} is not a directory'.format(dir_out)) + # return + + # log.info('RSYNC from {} to {}'.format(dir_in, dir_out)) + # log.debug('opt_validate: {}'.format(opt_validate)) + # log.debug('opt_extract: {}'.format(opt_extract)) + # # local_copy(paths, parallelism=10, extract=False, validate=False): + # file_utils.mkdirs(dir_out) + # rsync.copy(dir_in, dir_out, parallelism=opt_threads, + # validate=opt_validate, extract=opt_extract) + # else: + # log.debug('get paths') + # # use source mappings as rsync source + # if not opt_media_format: + # ctx.log.error('--media format not supplied for source mappings') + # return + + # # ensure FILEPATH metadata exists + # # parallel-rsync accepts a list of tupes (src, dst) + # file_routes = [] + # for chair_item in chair_items: + # item = chair_item.item + # sha256 = chair_item.item.sha256 + # filepath_metadata = item.get_metadata(types.Metadata.FILEPATH) + # if not filepath_metadata: + # ctx.log.error('no FILEPATH metadata') + # return + # fp_media = + # src = join('') + # dir_media = Paths.media_dir(opt_media_format, data_store=opt_disk, verified=ctx.opts['verified']) + # dst = join('') + # file_routes.append((src, dst)) + + # ctx.log.debug('dir_media: {}'.format(dir_media)) + # return + + # # ------------------------------------------------- + + # # send back to sink + # for chair_item in chair_items: + # sink.send(chair_item) diff --git a/megapixels/app/models/__init__.py b/megapixels/app/models/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/megapixels/app/models/__init__.py diff --git a/megapixels/app/models/bbox.py b/megapixels/app/models/bbox.py new file mode 100644 index 00000000..41b67416 --- /dev/null +++ b/megapixels/app/models/bbox.py @@ -0,0 +1,236 @@ +from dlib import rectangle as dlib_rectangle +import numpy as np + +class BBoxPoint: + + def __init__(self, x, y): + self._x = x + self._y = y + + @property + def x(self): + return self._x + + @property + def y(self): + return self._y + + def offset(self, x, y): + return (self._x + x, self._y + y) + + def tuple(self): + return (self._x, self._y) + + +class BBox: + + def __init__(self, x1, y1, x2, y2): + """Represents a bounding box and provides methods for accessing and modifying + :param x1: normalized left coord + :param y1: normalized top coord + :param x2: normalized right coord + :param y2: normalized bottom coord + """ + self._x1 = x1 + self._y1 = y1 + self._x2 = x2 + self._y2 = y2 + self._width = x2 - x1 + self._height = y2 - y1 + self._cx = x1 + (self._width // 2) + self._cy = y1 + (self._height // 2) + self._tl = (x1, y1) + self._br = (x2, y2) + self._rect = (self._x1, self._y1, self._x2, self._y2) + + + @property + def pt_tl(self): + return self._tl + + @property + def pt_br(self): + return self._br + + @property + def x(self): + return self._x1 + + @property + def y(self): + return self._y1 + + @property + def x1(self): + return self._x1 + + @property + def y1(self): + return self._y1 + + + @property + def x2(self): + return self._x2 + + @property + def y2(self): + return self._y2 + + @property + def height(self): + return self._height + + @property + def width(self): + return self._width + + @property + def h(self): + return self._height + + @property + def w(self): + return self._width + + @property + def cx(self): + return self._cx + + @property + def cy(self): + return self._cy + + # # ----------------------------------------------------------------- + # # Utils + + # def constrain(self, dim): + + + # ----------------------------------------------------------------- + # Modify + + def expand_dim(self, amt, dim): + """Expands BBox within dim + :param box: (tuple) left, top, right, bottom + :param dim: (tuple) width, height + :returns (BBox) in pixel dimensions + """ + # expand + rect_exp = list( (np.array(self._rect) + np.array([-amt, -amt, amt, amt])).astype('int')) + # outliers + oob = list(range(4)) + oob[0] = min(rect_exp[0], 0) + oob[1] = min(rect_exp[1], 0) + oob[2] = dim[0] - max(rect_exp[2], 2) + oob[3] = dim[1] - max(rect_exp[3], 3) + oob = np.array(oob) + oob[oob > 0] = 0 + # amount + oob = np.absolute(oob) + # threshold + rect_exp[0] = max(rect_exp[0], 0) + rect_exp[1] = max(rect_exp[1], 0) + rect_exp[2] = min(rect_exp[2], dim[0]) + rect_exp[3] = min(rect_exp[3], dim[1]) + # redistribute oob amounts + oob = np.array([-oob[2], -oob[3], oob[0], oob[1]]) + rect_exp = np.add(np.array(rect_exp), oob) + return BBox(*rect_exp) + + + # ----------------------------------------------------------------- + # Convert to + + def to_dim(self, dim): + """scale is (w, h) is tuple of dimensions""" + w, h = dim + rect = list((np.array(self._rect) * np.array([w, h, w, h])).astype('int')) + return BBox(*rect) + + def normalize(self, rect, dim): + w, h = dim + x1, y1, x2, y2 = rect + return (x1 / w, y1 / h, x2 / w, y2 / h) + + # ----------------------------------------------------------------- + # Format as + + def as_xyxy(self): + """Converts BBox back to x1, y1, x2, y2 rect""" + return (self._x1, self._y1, self._x2, self._y2) + + def as_xywh(self): + """Converts BBox back to haar type""" + return (self._x1, self._y1, self._width, self._height) + + def as_trbl(self): + """Converts BBox to CSS (top, right, bottom, left)""" + return (self._y1, self._x2, self._y2, self._x1) + + def as_dlib(self): + """Converts BBox to dlib rect type""" + return dlib.rectangle(self._x1, self._y1, self._x2, self._y2) + + def as_yolo(self): + """Converts BBox to normalized center x, center y, w, h""" + return (self._cx, self._cy, self._width, self._height) + + + # ----------------------------------------------------------------- + # Create from + + @classmethod + def from_xyxy_dim(cls, x1, y1, x2, y2, dim): + """Converts x1, y1, w, h to BBox and normalizes + :returns BBox + """ + rect = cls.normalize(cls, (x1, y1, x2, y2), dim) + return cls(*rect) + + @classmethod + def from_xywh_dim(cls, x, y, w, h, dim): + """Converts x1, y1, w, h to BBox and normalizes + :param rect: (list) x1, y1, w, h + :param dim: (list) w, h + :returns BBox + """ + rect = cls.normalize(cls, (x, y, x + w, y + h), dim) + return cls(*rect) + + @classmethod + def from_xywh(cls, x, y, w, h): + """Converts x1, y1, w, h to BBox + :param rect: (list) x1, y1, w, h + :param dim: (list) w, h + :returns BBox + """ + return cls(x, y, x+w, y+h) + + @classmethod + def from_css(cls, rect, dim): + """Converts rect from CSS (top, right, bottom, left) to BBox + :param rect: (list) x1, y1, x2, y2 + :param dim: (list) w, h + :returns BBox + """ + rect = (rect[3], rect[0], rect[1], rect[2]) + rect = cls.normalize(cls, rect, dim) + return cls(*rect) + + @classmethod + def from_dlib_dim(cls, rect, dim): + """Converts dlib.rectangle to BBox + :param rect: (list) x1, y1, x2, y2 + :param dim: (list) w, h + :returns dlib.rectangle + """ + rect = (rect.left(), rect.top(), rect.right(), rect.bottom()) + rect = cls.normalize(cls, rect, dim) + return cls(*rect) + + + def str(self): + """Return BBox as a string "x1, y1, x2, y2" """ + return self.as_box() + diff --git a/megapixels/app/models/click_factory.py b/megapixels/app/models/click_factory.py new file mode 100644 index 00000000..61a3b5e5 --- /dev/null +++ b/megapixels/app/models/click_factory.py @@ -0,0 +1,145 @@ +""" +Click processor factory +- Inspired by and used code from @wiretapped's HTSLAM codebase +- In particular the very useful +""" + +import os +import sys +from os.path import join +from pathlib import Path +import os +from os.path import join +import sys +from functools import update_wrapper, wraps +import itertools +from pathlib import Path +from glob import glob +import importlib +import logging + +import click +from app.settings import app_cfg as cfg + + +# -------------------------------------------------------- +# Click Group Class +# -------------------------------------------------------- + +# set global variable during parent class create +dir_plugins = None # set in create + +class ClickComplex: + """Wrapper generator for custom Click CLI's based on LR's coroutine""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + try: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + except Exception as ex: + logging.getLogger('app').error('{}'.format(ex)) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # Based on code from @wiretapped + HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup + + + +class ClickSimple: + """Wrapper generator for custom Click CLI's""" + + def __init__(self): + pass + + + class CustomGroup(click.Group): + #global dir_plugins # from CliGenerator init + + # lists commands in plugin directory + def list_commands(self, ctx): + global dir_plugins # from CliGenerator init + rv = list(self.commands.keys()) + fp_cmds = [Path(x) for x in Path(dir_plugins).iterdir() \ + if str(x).endswith('.py') \ + and '__init__' not in str(x)] + for fp_cmd in fp_cmds: + assert fp_cmd.name not in rv, "[-] Error: {} can't exist in cli.py and {}".format(fp_cmd.name) + rv.append(fp_cmd.stem) + rv.sort() + return rv + + # Complex version: gets commands in directory and in this file + # from HTSLAM + def get_command(self, ctx, cmd_name): + global dir_plugins # from CliGenerator init + if cmd_name in self.commands: + return self.commands[cmd_name] + ns = {} + fpp_cmd = Path(dir_plugins, cmd_name + '.py') + fp_cmd = fpp_cmd.as_posix() + if not fpp_cmd.exists(): + sys.exit('[-] {} file does not exist'.format(fpp_cmd)) + code = compile(fpp_cmd.read_bytes(), fp_cmd, 'exec') + try: + eval(code, ns, ns) + except Exception as ex: + logging.getLogger('vframe').error('exception: {}'.format(ex)) + @click.command() + def _fail(): + raise Exception('while loading {}'.format(fpp_cmd.name)) + _fail.short_help = repr(ex) + _fail.help = repr(ex) + return _fail + if 'cli' not in ns: + sys.exit('[-] Error: {} does not contain a cli function'.format(fp_cmd)) + return ns['cli'] + + @classmethod + def create(self, dir_plugins_local): + global dir_plugins + dir_plugins = dir_plugins_local + return self.CustomGroup diff --git a/megapixels/app/processors/__init__.py b/megapixels/app/processors/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/megapixels/app/processors/__init__.py diff --git a/megapixels/app/processors/face_detector.py b/megapixels/app/processors/face_detector.py new file mode 100644 index 00000000..02d068dc --- /dev/null +++ b/megapixels/app/processors/face_detector.py @@ -0,0 +1,103 @@ +import os +from os.path import join +from pathlib import Path + +import cv2 as cv +import numpy as np +import dlib +# import imutils + +from app.utils import im_utils, logger_utils +from app.models.bbox import BBox +from app.settings import app_cfg as cfg + +class DetectorDLIBCNN: + + dnn_size = (300, 300) + pyramids = 0 + conf_thresh = 0.85 + + def __init__(self, opt_gpu): + self.log = logger_utils.Logger.getLogger() + cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '') + os.environ['CUDA_VISIBLE_DEVICES'] = str(opt_gpu) + self.log.info('load model: {}'.format(cfg.DIR_MODELS_DLIB_CNN)) + self.detector = dlib.cnn_face_detection_model_v1(cfg.DIR_MODELS_DLIB_CNN) + os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices # reset + + def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=None): + rois = [] + conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh + pyramids = self.pyramids if opt_pyramids is None else opt_pyramids + dnn_size = self.dnn_size if opt_size is None else opt_size + # resize image + im = im_utils.resize(im, width=dnn_size[0], height=dnn_size[1]) + dim = im.shape[:2][::-1] + im = im_utils.bgr2rgb(im) # convert to RGB for dlib + # run detector + mmod_rects = self.detector(im, 1) + # sort results + for mmod_rect in mmod_rects: + if mmod_rect.confidence > conf_thresh: + bbox = BBox.from_dlib_dim(mmod_rect.rect, dim) + rois.append(bbox) + return rois + + +class DetectorDLIBHOG: + + size = (320, 240) + pyramids = 0 + + def __init__(self): + self.detector = dlib.get_frontal_face_detector() + + def detect(self, im, opt_size=None, opt_conf_thresh=None, opt_pyramids=0): + conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh + dnn_size = self.size if opt_size is None else opt_size + pyramids = self.pyramids if opt_pyramids is None else opt_pyramids + + im = im_utils.resize(im, width=opt_size[0], height=opt_size[1]) + dim = im.shape[:2][::-1] + im = im_utils.bgr2rgb(im) # ? + hog_results = self.detector.run(im, pyramids) + + rois = [] + if len(hog_results[0]) > 0: + for rect, score, direction in zip(*hog_results): + if score > opt_conf_thresh: + bbox = BBox.from_dlib_dim(rect, dim) + rois.append(bbox) + return rois + +class DetectorCVDNN: + + dnn_scale = 1.0 # fixed + dnn_mean = (104.0, 177.0, 123.0) # fixed + dnn_crop = False # crop or force resize + size = (300, 300) + conf_thresh = 0.85 + + def __init__(self): + fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.prototxt') + fp_model = join(cfg.DIR_MODELS_CAFFE, 'face_detect', 'opencv_face_detector.caffemodel') + self.net = cv.dnn.readNet(fp_prototxt, fp_model) + self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) + self.net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) + + def detect(self, im, opt_size=None, opt_conf_thresh=None): + """Detects faces and returns (list) of (BBox)""" + conf_thresh = self.conf_thresh if opt_conf_thresh is None else opt_conf_thresh + dnn_size = self.size if opt_size is None else opt_size + im = cv.resize(im, dnn_size) + blob = cv.dnn.blobFromImage(im, self.dnn_scale, dnn_size, self.dnn_mean) + self.net.setInput(blob) + net_outputs = self.net.forward() + + rois = [] + for i in range(0, net_outputs.shape[2]): + conf = net_outputs[0, 0, i, 2] + if conf > opt_conf_thresh: + rect_norm = net_outputs[0, 0, i, 3:7] + rois.append(BBox(*rect_norm)) + return rois
\ No newline at end of file diff --git a/megapixels/app/settings/__init__.py b/megapixels/app/settings/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/megapixels/app/settings/__init__.py diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py new file mode 100644 index 00000000..739ddce2 --- /dev/null +++ b/megapixels/app/settings/app_cfg.py @@ -0,0 +1,90 @@ +import os +from os.path import join +import logging +import collections + +import cv2 as cv + +from app.settings import types +from app.utils import click_utils + + +# ----------------------------------------------------------------------------- +# Enun lists used for custom Click Params +# ----------------------------------------------------------------------------- + +FaceDetectNetVar = click_utils.ParamVar(types.FaceDetectNet) + +LogLevelVar = click_utils.ParamVar(types.LogLevel) + +# # data_store +DATA_STORE = '/data_store_hdd/' +DIR_DATASETS = join(DATA_STORE,'datasets') +DIR_APPS = join(DATA_STORE,'apps') +DIR_APP = join(DIR_APPS,'megapixels') +DIR_MODELS = join(DIR_APP,'models') + +# # Frameworks +DIR_MODELS_CAFFE = join(DIR_MODELS,'caffe') +DIR_MODELS_DARKNET = join(DIR_MODELS,'darknet') +DIR_MODELS_DARKNET_PJREDDIE = join(DIR_MODELS_DARKNET, 'pjreddie') +DIR_MODELS_PYTORCH = join(DIR_MODELS,'pytorch') +DIR_MODELS_TORCH = join(DIR_MODELS,'torch') +DIR_MODELS_MXNET = join(DIR_MODELS,'mxnet') +DIR_MODELS_TF = join(DIR_MODELS,'tensorflow') +DIR_MODELS_DLIB = join(DIR_MODELS,'dlib') +DIR_MODELS_DLIB_CNN = join(DIR_MODELS_DLIB, 'mmod_human_face_detector.dat') +DIR_MODELS_DLIB_5PT = join(DIR_MODELS_DLIB, 'shape_predictor_5_face_landmarks.dat') +DIR_MODELS_DLIB_68PT = join(DIR_MODELS_DLIB, 'shape_predictor_68_face_landmarks.dat') + + +# Test images +DIR_TEST_IMAGES = join(DIR_APP, 'test', 'images') + +# ----------------------------------------------------------------------------- +# Drawing, GUI settings +# ----------------------------------------------------------------------------- +DIR_ASSETS = join(DIR_APP, 'assets') +FP_FONT = join(DIR_ASSETS, 'font') + + +# ----------------------------------------------------------------------------- +# click chair settings +# ----------------------------------------------------------------------------- +DIR_COMMANDS_PROCESSOR_ADMIN = 'admin/commands' +DIR_COMMANDS_PROCESSOR_DATASETS = 'datasets/commands' + +# ----------------------------------------------------------------------------- +# Filesystem settings +# hash trees enforce a maximum number of directories per directory +# ----------------------------------------------------------------------------- +ZERO_PADDING = 6 # padding for enumerated image filenames +#FRAME_NAME_ZERO_PADDING = 6 # is this active?? +CKPT_ZERO_PADDING = 9 +HASH_TREE_DEPTH = 3 +HASH_BRANCH_SIZE = 3 + +# ----------------------------------------------------------------------------- +# Logging options exposed for custom click Params +# ----------------------------------------------------------------------------- +LOGGER_NAME = 'app' +LOGLEVELS = { + types.LogLevel.DEBUG: logging.DEBUG, + types.LogLevel.INFO: logging.INFO, + types.LogLevel.WARN: logging.WARN, + types.LogLevel.ERROR: logging.ERROR, + types.LogLevel.CRITICAL: logging.CRITICAL +} +LOGLEVEL_OPT_DEFAULT = types.LogLevel.DEBUG.name +#LOGFILE_FORMAT = "%(asctime)s: %(levelname)s: %(message)s" +#LOGFILE_FORMAT = "%(levelname)s:%(name)s: %(message)s" +#LOGFILE_FORMAT = "%(levelname)s: %(message)s" +#LOGFILE_FORMAT = "%(filename)s:%(lineno)s %(funcName)s() %(message)s" +# colored logs +""" +black, red, green, yellow, blue, purple, cyan and white. +{color}, fg_{color}, bg_{color}: Foreground and background colors. +bold, bold_{color}, fg_bold_{color}, bg_bold_{color}: Bold/bright colors. +reset: Clear all formatting (both foreground and background colors). +""" +LOGFILE_FORMAT = "%(log_color)s%(levelname)-8s%(reset)s %(cyan)s%(filename)s:%(lineno)s:%(bold_cyan)s%(funcName)s() %(reset)s%(message)s"
\ No newline at end of file diff --git a/megapixels/app/settings/types.py b/megapixels/app/settings/types.py new file mode 100644 index 00000000..0c3d7942 --- /dev/null +++ b/megapixels/app/settings/types.py @@ -0,0 +1,29 @@ +from enum import Enum + +def find_type(name, enum_type): + for enum_opt in enum_type: + if name == enum_opt.name.lower(): + return enum_opt + return None + + + +class FaceDetectNet(Enum): + """Scene text detector networks""" + HAAR, DLIB_CNN, DLIB_HOG, CVDNN = range(4) + +class CVBackend(Enum): + """OpenCV 3.4.2+ DNN target type""" + DEFAULT, HALIDE, INFER_ENGINE, OPENCV = range(4) + +class CVTarget(Enum): + """OpenCV 3.4.2+ DNN backend processor type""" + CPU, OPENCL, OPENCL_FP16, MYRIAD = range(4) + +# --------------------------------------------------------------------- +# Logger, monitoring +# -------------------------------------------------------------------- + +class LogLevel(Enum): + """Loger vebosity""" + DEBUG, INFO, WARN, ERROR, CRITICAL = range(5) diff --git a/megapixels/app/utils/__init__.py b/megapixels/app/utils/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/megapixels/app/utils/__init__.py diff --git a/megapixels/app/utils/click_utils.py b/megapixels/app/utils/click_utils.py new file mode 100644 index 00000000..dc00f58c --- /dev/null +++ b/megapixels/app/utils/click_utils.py @@ -0,0 +1,62 @@ +""" +Custom Click parameter types +""" +import click + +from app.settings import app_cfg as cfg +from app.settings import types + + +# -------------------------------------------------------- +# Click command helpers +# -------------------------------------------------------- +def enum_to_names(enum_type): + return {x.name.lower(): x for x in enum_type} + +def show_help(enum_type): + names = enum_to_names(enum_type) + return 'Options: "{}"'.format(', '.join(list(names.keys()))) + +def get_default(opt): + return opt.name.lower() + + +# -------------------------------------------------------- +# Custom Click parameter class +# -------------------------------------------------------- + + +class ParamVar(click.ParamType): + + name = 'default_type' + + def __init__(self, param_type): + # self.name = '{}'.format(param_type.name.lower()) + # sealf. + self.ops = {x.name.lower(): x for x in param_type} + + def convert(self, value, param, ctx): + """converts (str) repr to Enum hash""" + try: + return self.ops[value.lower()] + except: + self.fail('{} is not a valid option'.format(value, param, ctx)) + + + + + + + + + + + + + + + + + + + diff --git a/megapixels/app/utils/file_utils.py b/megapixels/app/utils/file_utils.py new file mode 100644 index 00000000..773667b1 --- /dev/null +++ b/megapixels/app/utils/file_utils.py @@ -0,0 +1,400 @@ +""" +File utilities +""" +import sys +import os +from os.path import join +import stat + +from glob import glob +from pprint import pprint +import shutil +import distutils +import pathlib +from pathlib import Path +import json +import csv +import pickle +import threading +from queue import Queue +import time +import logging +import itertools +import collections + +import hashlib +import pymediainfo +import click +from tqdm import tqdm +import cv2 as cv +from PIL import Image +import imutils + +from app.settings import app_cfg as cfg +from app.settings import types + +log = logging.getLogger(cfg.LOGGER_NAME) + + +# ------------------------------------------ +# File I/O read/write little helpers +# ------------------------------------------ + +def glob_multi(dir_in, exts): + files = [] + for e in exts: + files.append(glob(join(dir_in, '*.{}'.format(e)))) + return files + + +def zpad(x, zeros=cfg.ZERO_PADDING): + return str(x).zfill(zeros) + +def get_ext(fpp, lower=True): + """Retuns the file extension w/o dot + :param fpp: (Pathlib.path) filepath + :param lower: (bool) force lowercase + :returns: (str) file extension (ie 'jpg') + """ + fpp = ensure_posixpath(fpp) + ext = fpp.suffix.replace('.', '') + return ext.lower() if lower else ext + + +def convert(fp_in, fp_out): + """Converts between JSON and Pickle formats + Pickle files are about 30-40% smaller filesize + """ + if get_ext(fp_in) == get_ext(fp_out): + log.error('Input: {} and output: {} are the same. Use this to convert.') + + lazywrite(lazyload(fp_in), fp_out) + + +def load_csv(fp_in, as_list=True): + """Loads CSV and retuns list of items + :param fp_in: string filepath to CSV + :returns: list of all CSV data + """ + if not Path(fp_in).exists(): + log.info('loading {}'.format(fp_in)) + log.info('loading: {}'.format(fp_in)) + with open(fp_in, 'r') as fp: + items = csv.DictReader(fp) + if as_list: + items = [x for x in items] + log.info('returning {:,} items'.format(len(items))) + return items + + +def lazywrite(data, fp_out, sort_keys=True): + """Writes JSON or Pickle data""" + ext = get_ext(fp_out) + if ext == 'json': + return write_json(data, fp_out, sort_keys=sort_keys) + elif ext == 'pkl': + return write_pickle(data, fp_out) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + +def lazyload(fp_in, ordered=True): + """Loads JSON or Pickle serialized data""" + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + ext = get_ext(fp_in) + if ext == 'json': + items = load_json(fp_in) + elif ext == 'pkl': + items = load_pickle(fp_in) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + if ordered: + return collections.OrderedDict(sorted(items.items(), key=lambda t: t[0])) + else: + return items + + +def load_text(fp_in): + with open(fp_in, 'rt') as fp: + lines = fp.read().rstrip('\n').split('\n') + return lines + +def load_json(fp_in): + """Loads JSON and returns items + :param fp_in: (str) filepath + :returns: data from JSON + """ + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + with open(str(fp_in), 'r') as fp: + data = json.load(fp) + return data + + +def load_pickle(fp_in): + """Loads Pickle and returns items + :param fp_in: (str) filepath + :returns: data from JSON + """ + if not Path(fp_in).exists(): + log.error('file does not exist: {}'.format(fp_in)) + return {} + with open(str(fp_in), 'rb') as fp: + data = pickle.load(fp) + return data + + +def order_items(records): + """Orders records by ASC SHA256""" + return collections.OrderedDict(sorted(records.items(), key=lambda t: t[0])) + +def write_text(data, fp_out, ensure_path=True): + if not data: + log.error('no data') + return + + if ensure_path: + mkdirs(fp_out) + with open(fp_out, 'w') as fp: + if type(data) == list: + fp.write('\n'.join(data)) + else: + fp.write(data) + + +def write_pickle(data, fp_out, ensure_path=True): + """ + """ + if ensure_path: + mkdirs(fp_out) # mkdir + with open(fp_out, 'wb') as fp: + pickle.dump(data, fp) + + +def write_json(data, fp_out, minify=True, ensure_path=True, sort_keys=True): + """ + """ + if ensure_path: + mkdirs(fp_out) + with open(fp_out, 'w') as fp: + if minify: + json.dump(data, fp, separators=(',',':'), sort_keys=sort_keys) + else: + json.dump(data, fp, indent=2, sort_keys=sort_keys) + +def write_csv(data, fp_out, header=None): + """ """ + with open(fp_out, 'w') as fp: + writer = csv.DictWriter(fp, fieldnames=header) + writer.writeheader() + if type(data) is dict: + for k, v in data.items(): + fp.writerow('{},{}'.format(k, v)) + + +def write_serialized_items(items, fp_out, ensure_path=True, minify=True, sort_keys=True): + """Writes serialized data + :param items: (dict) a sha256 dict of MappingItems + :param serialize: (bool) serialize the data + :param ensure_path: ensure the parent directories exist + :param minify: reduces JSON file size + """ + log.info('Writing serialized data...') + fpp_out = ensure_posixpath(fp_out) + serialized_items = {k: v.serialize() for k, v in tqdm(items.items()) } + # write data + ext = get_ext(fpp_out) + if ext == 'json': + write_json(serialized_items, fp_out, ensure_path=ensure_path, minify=minify, sort_keys=sort_keys) + elif ext == 'pkl': + write_pickle(serialized_items, fp_out) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + log.info('Wrote {:,} items to {}'.format(len(items), fp_out)) + + +def write_modeled_data(data, fp_out, ensure_path=False): + """ + """ + fpp_out = ensure_posixpath(fp_out) + if ensure_path: + mkdirs(fpp_out) + ext = get_ext(fpp_out) + if ext == 'pkl': + write_pickle(data, str(fp_out)) + else: + raise NotImplementedError('[!] {} is not yet supported. Use .pkl or .json'.format(ext)) + + +# --------------------------------------------------------------------- +# Filepath utilities +# --------------------------------------------------------------------- + +def ensure_posixpath(fp): + """Ensures filepath is pathlib.Path + :param fp: a (str, LazyFile, PosixPath) + :returns: a PosixPath filepath object + """ + if type(fp) == str: + fpp = Path(fp) + elif type(fp) == click.utils.LazyFile: + fpp = Path(fp.name) + elif type(fp) == pathlib.PosixPath: + fpp = fp + else: + raise TypeError('{} is not a valid filepath type'.format(type(fp))) + return fpp + + +def mkdirs(fp): + """Ensure parent directories exist for a filepath + :param fp: string, Path, or click.File + """ + fpp = ensure_posixpath(fp) + fpp = fpp.parent if fpp.suffix else fpp + fpp.mkdir(parents=True, exist_ok=True) + + +def ext_media_format(ext): + """Converts file extension into Enum MediaType + param ext: str of file extension" + """ + for media_format, exts in cfg.VALID_MEDIA_EXTS.items(): + if ext in exts: + return media_format + raise ValueError('{} is not a valid option'.format(ext)) + + +def sha256(fp_in, block_size=65536): + """Generates SHA256 hash for a file + :param fp_in: (str) filepath + :param block_size: (int) byte size of block + :returns: (str) hash + """ + sha256 = hashlib.sha256() + with open(fp_in, 'rb') as fp: + for block in iter(lambda: f.read(block_size), b''): + sha256.update(block) + return sha256.hexdigest() + + +def sha256_tree(sha256): + """Split hash into branches with tree-depth for faster file indexing + :param sha256: str of a sha256 hash + :returns: str with sha256 tree with '/' delimeter + """ + branch_size = cfg.HASH_BRANCH_SIZE + tree_size = cfg.HASH_TREE_DEPTH * branch_size + sha256_tree = [sha256[i:(i+branch_size)] for i in range(0, tree_size, branch_size)] + return '/'.join(sha256_tree) + + +def migrate(fmaps, threads=1, action='copy', force=False): + """Copy/move/symlink files form src to dst directory + :param fmaps: (dict) with 'src' and 'dst' filepaths + :param threads: (int) number of threads + :param action: (str) copy/move/symlink + :param force: (bool) force overwrite existing files + """ + log = log + num_items = len(fmaps) + + def copytree(src, dst, symlinks = False, ignore = None): + # ozxyqk: https://stackoverflow.com/questions/22588225/how-do-you-merge-two-directories-or-move-with-replace-from-the-windows-command + if not os.path.exists(dst): + mkdirs(dst) + # os.makedirs(dst) + shutil.copystat(src, dst) + lst = os.listdir(src) + if ignore: + excl = ignore(src, lst) + lst = [x for x in lst if x not in excl] + for item in lst: + s = os.path.join(src, item) + d = os.path.join(dst, item) + if symlinks and os.path.islink(s): + if os.path.exists(d): + os.remove(d) + os.symlink(os.readlink(s), d) + try: + st = os.lstat(s) + mode = stat.S_IMODE(st.st_mode) + os.lchmod(d, mode) + except: + pass # lchmod not available + elif os.path.isdir(s): + copytree(s, d, symlinks, ignore) + else: + shutil.copy(s, d) + + assert(action in ['copy','move','symlink']) + + if threads > 1: + # threaded + task_queue = Queue() + print_lock = threading.Lock() + + def migrate_action(fmap): + data_local = threading.local() + data_local.src, data_local.dst = (fmap['src'], fmap['dst']) + data_local.src_path = Path(data_local.src) + data_local.dst_path = Path(data_local.dst) + + if force or not data_local.dst_path.exists(): + if action == 'copy': + shutil.copy(data_local.src, data_local.dst) + #if data_local.src_path.is_dir(): + # copytree(data_local.src, data_local.dst) + #else: + elif action == 'move': + shutil.move(data_local.src, data_local.dst) + elif action == 'symlink': + if force: + data_local.dst_path.unlink() + Path(data_local.src).symlink_to(data_local.dst) + + def process_queue(num_items): + # TODO: progress bar + while True: + fmap = task_queue.get() + migrate_action(fmap) + log.info('migrate: {:.2f} {:,}/{:,}'.format( + (task_queue.qsize() / num_items)*100, task_queue.qsize(), num_items)) + task_queue.task_done() + + # avoid race conditions by creating dir structure here + log.info('create directory structure') + for fmap in tqdm(fmaps): + mkdirs(fmap['dst']) + + # init threads + for i in range(threads): + t = threading.Thread(target=process_queue, args=(num_items,)) + t.daemon = True + t.start() + + # process threads + start = time.time() + for fmap in fmaps: + task_queue.put(fmap) + + task_queue.join() + + else: + # non-threaded + for fmap in tqdm(fmaps): + mkdirs(fmap['dst']) + if action == 'copy': + shutil.copy(fmap['src'], fmap['dst']) + elif action == 'move': + shutil.move(fmap['src'], fmap['dst']) + elif action == 'symlink': + if force: + Path(fmap['dst'].unlink()) + Path(fp_src).symlink_to(fp_dst) + return + diff --git a/megapixels/app/utils/im_utils.py b/megapixels/app/utils/im_utils.py new file mode 100644 index 00000000..a0f23cd2 --- /dev/null +++ b/megapixels/app/utils/im_utils.py @@ -0,0 +1,506 @@ +import sys +import os +from os.path import join +import cv2 as cv +import imagehash +from PIL import Image, ImageDraw, ImageFilter, ImageOps +from skimage.filters.rank import entropy +from skimage.morphology import disk +from skimage import feature +# import matplotlib.pyplot as plt +import imutils +import time +import numpy as np +import torch +import torch.nn as nn +import torchvision.models as models +import torchvision.transforms as transforms +from torch.autograd import Variable +from sklearn.metrics.pairwise import cosine_similarity +import datetime + + + + +def compute_features(fe,frames,phashes,phash_thresh=1): + """ + Get vector embedding using FeatureExtractor + :param fe: FeatureExtractor class + :param frames: list of frame images as numpy.ndarray + :param phash_thresh: perceptual hash threshold + :returns: list of feature vectors + """ + vals = [] + phash_pre = phashes[0] + for i,im in enumerate(frames): + if i == 0 or (phashes[i] - phashes[i-1]) > phash_thresh: + vals.append(fe.extract(im)) + else: + vals.append(vals[i-1]) + return vals + + +def ensure_pil(im, bgr2rgb=False): + """Ensure image is Pillow format + :param im: image in numpy or PIL.Image format + :returns: image in Pillow RGB format + """ + try: + im.verify() + return im + except: + if bgr2rgb: + im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + return Image.fromarray(im.astype('uint8'), 'RGB') + +def ensure_np(im): + """Ensure image is Numpy.ndarry format + :param im: image in numpy or PIL.Image format + :returns: image in Numpy uint8 format + """ + if type(im) == np.ndarray: + return im + return np.asarray(im, np.uint8) + + +def resize(im,width=0,height=0): + """resize image using imutils. Use w/h=[0 || None] to prioritize other edge size + :param im: a Numpy.ndarray image + :param wh: a tuple of (width, height) + """ + w = width + h = height + if w is 0 and h is 0: + return im + elif w > 0 and h > 0: + return imutils.resize(im,width=w,height=h) + elif w > 0 and h is 0: + return imutils.resize(im,width=w) + elif w is 0 and h > 0: + return imutils.resize(im,height=h) + else: + return im + +def filter_pixellate(im,num_cells): + """Pixellate image by downsample then upsample + :param im: PIL.Image + :returns: PIL.Image + """ + w,h = im.size + im = im.resize((num_cells,num_cells), Image.NEAREST) + im = im.resize((w,h), Image.NEAREST) + return im + +# Plot images inline using Matplotlib +# def pltimg(im,title=None,mode='rgb',figsize=(8,12),dpi=160,output=None): +# plt.figure(figsize=figsize) +# plt.xticks([]),plt.yticks([]) +# if title is not None: +# plt.title(title) +# if mode.lower() == 'bgr': +# im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + +# f = plt.gcf() +# if mode.lower() =='grey' or mode.lower() == 'gray': +# plt.imshow(im,cmap='gray') +# else: +# plt.imshow(im) +# plt.show() +# plt.draw() +# if output is not None: +# bbox_inches='tight' +# ext=osp.splitext(output)[1].replace('.','') +# f.savefig(output,dpi=dpi,format=ext) +# print('Image saved to: {}'.format(output)) + + + +# Utilities for analyzing frames + +def compute_gray(im): + im = cv.cvtColor(im,cv.COLOR_BGR2GRAY) + n_vals = float(im.shape[0] * im.shape[1]) + avg = np.sum(im[:]) / n_vals + return avg + +def compute_rgb(im): + im = cv.cvtColor(im,cv.COLOR_BGR2RGB) + n_vals = float(im.shape[0] * im.shape[1]) + avg_r = np.sum(im[:,:,0]) / n_vals + avg_g = np.sum(im[:,:,1]) / n_vals + avg_b = np.sum(im[:,:,2]) / n_vals + avg_rgb = np.sum(im[:,:,:]) / (n_vals * 3.0) + return avg_r, avg_b, avg_g, avg_rgb + +def compute_hsv(im): + im = cv.cvtColor(im,cv.COLOR_BGR2HSV) + n_vals = float(im.shape[0] * im.shape[1]) + avg_h = np.sum(frame[:,:,0]) / n_vals + avg_s = np.sum(frame[:,:,1]) / n_vals + avg_v = np.sum(frame[:,:,2]) / n_vals + avg_hsv = np.sum(frame[:,:,:]) / (n_vals * 3.0) + return avg_h, avg_s, avg_v, avg_hsv + +def pys_dhash(im, hashSize=8): + # resize the input image, adding a single column (width) so we + # can compute the horizontal gradient + resized = cv.resize(im, (hashSize + 1, hashSize)) + # compute the (relative) horizontal gradient between adjacent + # column pixels + diff = resized[:, 1:] > resized[:, :-1] + # convert the difference image to a hash + return sum([2 ** i for (i, v) in enumerate(diff.flatten()) if v]) + + +############################################ +# ImageHash +# pip install imagehash +############################################ + + +def compute_ahash(im): + """Compute average hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.average_hash(ensure_pil(im_pil)) + +def compute_phash(im): + """Compute perceptual hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.phash(ensure_pil(im)) + +def compute_dhash(im): + """Compute difference hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.dhash(ensure_pil(im)) + +def compute_whash(im): + """Compute wavelet hash using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return imagehash.whash(ensure_pil(im)) + +def compute_whash_b64(im): + """Compute wavelest hash base64 using ImageHash library + :param im: Numpy.ndarray + :returns: Imagehash.ImageHash + """ + return lambda im: imagehash.whash(ensure_pil(im), mode='db4') + + +############################################ +# Pillow +############################################ + +def sharpen(im): + """Sharpen image using PIL.ImageFilter + param: im: PIL.Image + returns: PIL.Image + """ + im = ensure_pil(im) + im.filter(ImageFilter.SHARPEN) + return ensure_np(im) + +def fit_image(im,targ_size): + """Force fit image by cropping + param: im: PIL.Image + param: targ_size: a tuple of target (width, height) + returns: PIL.Image + """ + im_pil = ensure_pil(im) + frame_pil = ImageOps.fit(im_pil, targ_size, + method=Image.BICUBIC, centering=(0.5, 0.5)) + return ensure_np(frame_pil) + + +def compute_entropy(im): + entr_img = entropy(im, disk(10)) + + +############################################ +# scikit-learn +############################################ + +def compute_entropy(im): + # im is grayscale numpy + return entropy(im, disk(10)) + +############################################ +# OpenCV +############################################ + +def bgr2gray(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (BGR) + :returns: Numpy.ndarray (Gray) + """ + return cv.cvtColor(im,cv.COLOR_BGR2GRAY) + +def gray2bgr(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (Gray) + :returns: Numpy.ndarray (BGR) + """ + return cv.cvtColor(im,cv.COLOR_GRAY2BGR) + +def bgr2rgb(im): + """Wrapper for cv2.cvtColor transform + :param im: Numpy.ndarray (BGR) + :returns: Numpy.ndarray (RGB) + """ + return cv.cvtColor(im,cv.COLOR_BGR2RGB) + +def compute_laplacian(im): + # below 100 is usually blurry + return cv.Laplacian(im, cv.CV_64F).var() + + +# http://radjkarl.github.io/imgProcessor/index.html# + +def modifiedLaplacian(img): + ''''LAPM' algorithm (Nayar89)''' + M = np.array([-1, 2, -1]) + G = cv.getGaussianKernel(ksize=3, sigma=-1) + Lx = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=M, kernelY=G) + Ly = cv.sepFilter2D(src=img, ddepth=cv.CV_64F, kernelX=G, kernelY=M) + FM = np.abs(Lx) + np.abs(Ly) + return cv.mean(FM)[0] + +def varianceOfLaplacian(img): + ''''LAPV' algorithm (Pech2000)''' + lap = cv.Laplacian(img, ddepth=-1)#cv.cv.CV_64F) + stdev = cv.meanStdDev(lap)[1] + s = stdev[0]**2 + return s[0] + +def tenengrad(img, ksize=3): + ''''TENG' algorithm (Krotkov86)''' + Gx = cv.Sobel(img, ddepth=cv.CV_64F, dx=1, dy=0, ksize=ksize) + Gy = cv.Sobel(img, ddepth=cv.CV_64F, dx=0, dy=1, ksize=ksize) + FM = Gx**2 + Gy**2 + return cv.mean(FM)[0] + +def normalizedGraylevelVariance(img): + ''''GLVN' algorithm (Santos97)''' + mean, stdev = cv.meanStdDev(img) + s = stdev[0]**2 / mean[0] + return s[0] + +def compute_if_blank(im,width=100,sigma=0,thresh_canny=.1,thresh_mean=4,mask=None): + # im is graysacale np + #im = imutils.resize(im,width=width) + #mask = imutils.resize(mask,width=width) + if mask is not None: + im_canny = feature.canny(im,sigma=sigma,mask=mask) + total = len(np.where(mask > 0)[0]) + else: + im_canny = feature.canny(im,sigma=sigma) + total = (im.shape[0]*im.shape[1]) + n_white = len(np.where(im_canny > 0)[0]) + per = n_white/total + if np.mean(im) < thresh_mean or per < thresh_canny: + return 1 + else: + return 0 + + +def print_timing(t,n): + t = time.time()-t + print('Elapsed time: {:.2f}'.format(t)) + print('FPS: {:.2f}'.format(n/t)) + +def vid2frames(fpath, limit=5000, width=None, idxs=None): + """Convert a video file into list of frames + :param fpath: filepath to the video file + :param limit: maximum number of frames to read + :param fpath: the indices of frames to keep (rest are skipped) + :returns: (fps, number of frames, list of Numpy.ndarray frames) + """ + frames = [] + try: + cap = cv.VideoCapture(fpath) + except: + print('[-] Error. Could not read video file: {}'.format(fpath)) + try: + cap.release() + except: + pass + return frames + + fps = cap.get(cv.CAP_PROP_FPS) + nframes = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) + + if idxs is not None: + # read sample indices by seeking to frame index + for idx in idxs: + cap.set(cv.CAP_PROP_POS_FRAMES, idx) + res, frame = cap.read() + if width is not None: + frame = imutils.resize(frame, width=width) + frames.append(frame) + else: + while(True and len(frames) < limit): + res, frame = cap.read() + if not res: + break + if width is not None: + frame = imutils.resize(frame, width=width) + frames.append(frame) + + cap.release() + del cap + #return fps,nframes,frames + return frames + +def convolve_filter(vals,filters=[1]): + for k in filters: + vals_tmp = np.zeros_like(vals) + t = len(vals_tmp) + for i,v in enumerate(vals): + sum_vals = vals[max(0,i-k):min(t-1,i+k)] + vals_tmp[i] = np.mean(sum_vals) + vals = vals_tmp.copy() + return vals + +def cosine_delta(v1,v2): + return 1.0 - cosine_similarity(v1.reshape((1, -1)), v2.reshape((1, -1)))[0][0] + + + +def compute_edges(vals): + # find edges (1 = rising, -1 = falling) + edges = np.zeros_like(vals) + for i in range(len(vals[1:])): + delta = vals[i] - vals[i-1] + if delta == -1: + edges[i] = 1 # rising edge 0 --> 1 + elif delta == 1: + edges[i+1] = 2 # falling edge 1 --> 0 + # get index for rise fall + rising = np.where(np.array(edges) == 1)[0] + falling = np.where(np.array(edges) == 2)[0] + return rising, falling + + +############################################ +# Point, Rect +############################################ + +class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + +class Rect(object): + def __init__(self, p1, p2): + '''Store the top, bottom, left and right values for points + p1 and p2 are the (corners) in either order + ''' + self.left = min(p1.x, p2.x) + self.right = max(p1.x, p2.x) + self.top = min(p1.y, p2.y) + self.bottom = max(p1.y, p2.y) + +def overlap(r1, r2): + '''Overlapping rectangles overlap both horizontally & vertically + ''' + return range_overlap(r1.left, r1.right, r2.left, r2.right) and \ + range_overlap(r1.top, r1.bottom, r2.top, r2.bottom) + +def range_overlap(a_min, a_max, b_min, b_max): + '''Neither range is completely greater than the other + ''' + return (a_min <= b_max) and (b_min <= a_max) + +def merge_rects(r1,r2): + p1 = Point(min(r1.left,r2.left),min(r1.top,r2.top)) + p2 = Point(max(r1.right,r2.right),max(r1.bottom,r2.bottom)) + return Rect(p1,p2) + +def is_overlapping(r1,r2): + """r1,r2 as [x1,y1,x2,y2] list""" + r1x = Rect(Point(r1[0],r1[1]),Point(r1[2],r1[3])) + r2x = Rect(Point(r2[0],r2[1]),Point(r2[2],r2[3])) + return overlap(r1x,r2x) + +def get_rects_merged(rects,bounds,expand=0): + """rects: list of points in [x1,y1,x2,y2] format""" + rects_expanded = [] + bx,by = bounds + # expand + for x1,y1,x2,y2 in rects: + x1 = max(0,x1-expand) + y1 = max(0,y1-expand) + x2 = min(bx,x2+expand) + y2 = min(by,y2+expand) + rects_expanded.append(Rect(Point(x1,y1),Point(x2,y2))) + + #rects_expanded = [Rect(Point(x1,y1),Point(x2,y2)) for x1,y1,x2,y2 in rects_expanded] + rects_merged = [] + for i,r in enumerate(rects_expanded): + found = False + for j,rm in enumerate(rects_merged): + if overlap(r,rm): + rects_merged[j] = merge_rects(r,rm) #expand + found = True + if not found: + rects_merged.append(r) + # convert back to [x1,y1,x2,y2] format + rects_merged = [(r.left,r.top,r.right,r.bottom) for r in rects_merged] + # contract + rects_contracted = [] + for x1,y1,x2,y2 in rects_merged: + x1 = min(bx,x1+expand) + y1 = min(by,y1+expand) + x2 = max(0,x2-expand) + y2 = max(0,y2-expand) + rects_contracted.append((x1,y1,x2,y2)) + + return rects_contracted + + +############################################ +# Image display +############################################ + + +def montage(frames,ncols=4,nrows=None,width=None): + """Convert list of frames into a grid montage + param: frames: list of frames as Numpy.ndarray + param: ncols: number of columns + param: width: resize images to this width before adding to grid + returns: Numpy.ndarray grid of all images + """ + + # expand image size if not enough frames + if nrows is not None and len(frames) < ncols * nrows: + blank = np.zeros_like(frames[0]) + n = ncols * nrows - len(frames) + for i in range(n): frames.append(blank) + + rows = [] + for i,im in enumerate(frames): + if width is not None: + im = imutils.resize(im,width=width) + h,w = im.shape[:2] + if i % ncols == 0: + if i > 0: + rows.append(ims) + ims = [] + ims.append(im) + if len(ims) > 0: + for j in range(ncols-len(ims)): + ims.append(np.zeros_like(im)) + rows.append(ims) + row_ims = [] + for row in rows: + row_im = np.hstack(np.array(row)) + row_ims.append(row_im) + contact_sheet = np.vstack(np.array(row_ims)) + return contact_sheet diff --git a/megapixels/app/utils/logger_utils.py b/megapixels/app/utils/logger_utils.py new file mode 100644 index 00000000..d4f962eb --- /dev/null +++ b/megapixels/app/utils/logger_utils.py @@ -0,0 +1,68 @@ +""" +Logger instantiator for use with Click utlity scripts +""" +import sys +import os +import logging + +import colorlog + +from app.settings import app_cfg as cfg + + +class Logger: + + logger_name = 'app' + + def __init__(self): + pass + + @staticmethod + def create(verbosity=4, logfile=None): + """Configures a logger from click params + :param verbosity: (int) between 0 and 5 + :param logfile: (str) path to logfile + :returns: logging root object + """ + + loglevel = (5 - (max(0, min(verbosity, 5)))) * 10 # where logging.DEBUG = 10 + date_format = '%Y-%m-%d %H:%M:%S' + if 'colorlog' in sys.modules and os.isatty(2): + cformat = '%(log_color)s' + cfg.LOGFILE_FORMAT + f = colorlog.ColoredFormatter(cformat, date_format, + log_colors = { 'DEBUG' : 'yellow', 'INFO' : 'white', + 'WARNING' : 'bold_yellow', 'ERROR': 'bold_red', + 'CRITICAL': 'bold_red' }) + else: + f = logging.Formatter(cfg.LOGFILE_FORMAT, date_format) + + # logger = logging.getLogger(Logger.logger_name) + logger = logging.getLogger(cfg.LOGGER_NAME) + logger.setLevel(loglevel) + + if logfile: + # create file handler which logs even debug messages + fh = logging.FileHandler(logfile) + fh.setLevel(loglevel) + logger.addHandler(fh) + + # add colored handler + ch = logging.StreamHandler() + ch.setFormatter(f) + logger.addHandler(ch) + + if verbosity == 0: + logger.disabled = True + + # test + # logger.debug('Hello Debug') + # logger.info('Hello Info') + # logger.warn('Hello Warn') + # logger.error('Hello Error') + # logger.critical('Hello Critical') + + return logger + + @staticmethod + def getLogger(): + return logging.getLogger(cfg.LOGGER_NAME)
\ No newline at end of file diff --git a/megapixels/cli_admin.py b/megapixels/cli_admin.py new file mode 100644 index 00000000..45ebeed4 --- /dev/null +++ b/megapixels/cli_admin.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------- +# This is the vframe administrative script for utility +# add/edit commands in vframe/admin/commands directory +# -------------------------------------------------------- +import click + +from app.settings import app_cfg as cfg +from app.utils import logger_utils +from app.models.click_factory import ClickSimple + +# click cli factory +cc = ClickSimple.create(cfg.DIR_COMMANDS_PROCESSOR_ADMIN) + +# -------------------------------------------------------- +# CLI +# -------------------------------------------------------- +@click.group(cls=cc, chain=False) +@click.option('-v', '--verbose', 'verbosity', count=True, default=4, + show_default=True, + help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL') +@click.pass_context +def cli(ctx, **kwargs): + """\033[1m\033[94mMegaPixels: Admin/Utility Scripts\033[0m + """ + ctx.opts = {} + # init logger + logger_utils.Logger.create(verbosity=kwargs['verbosity']) + + + +# -------------------------------------------------------- +# Entrypoint +# -------------------------------------------------------- +if __name__ == '__main__': + cli() + diff --git a/megapixels/cli_datasets.py b/megapixels/cli_datasets.py new file mode 100644 index 00000000..ae484e80 --- /dev/null +++ b/megapixels/cli_datasets.py @@ -0,0 +1,36 @@ +# -------------------------------------------------------- +# This is the vframe administrative script for utility +# add/edit commands in vframe/admin/commands directory +# -------------------------------------------------------- +import click + +from app.settings import app_cfg as cfg +from app.utils import logger_utils +from app.models.click_factory import ClickSimple + +# click cli factory +cc = ClickSimple.create(cfg.DIR_COMMANDS_PROCESSOR_DATASETS) + +# -------------------------------------------------------- +# CLI +# -------------------------------------------------------- +@click.group(cls=cc, chain=False) +@click.option('-v', '--verbose', 'verbosity', count=True, default=4, + show_default=True, + help='Verbosity: -v DEBUG, -vv INFO, -vvv WARN, -vvvv ERROR, -vvvvv CRITICAL') +@click.pass_context +def cli(ctx, **kwargs): + """\033[1m\033[94mMegaPixels: Admin/Utility Scripts\033[0m + """ + ctx.opts = {} + # init logger + logger_utils.Logger.create(verbosity=kwargs['verbosity']) + + + +# -------------------------------------------------------- +# Entrypoint +# -------------------------------------------------------- +if __name__ == '__main__': + cli() + diff --git a/megapixels/datasets/commands/crop.py b/megapixels/datasets/commands/crop.py new file mode 100644 index 00000000..778be0c4 --- /dev/null +++ b/megapixels/datasets/commands/crop.py @@ -0,0 +1,104 @@ +""" +Crop images to prepare for training +""" + +import click +from PIL import Image, ImageOps, ImageFilter, ImageDraw + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_dir_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_dir_out', required=True, + help='Output directory') +@click.option('-e', '--ext', 'opt_ext', + default='jpg', type=click.Choice(['jpg', 'png']), + help='File glob ext') +@click.option('--size', 'opt_size', + type=(int, int), default=(256, 256), + help='Output image size') +@click.option('-t', '--crop-type', 'opt_crop_type', + default='center', type=click.Choice(['center', 'mirror', 'face', 'person', 'none']), + help='Force fit image center location') +@click.pass_context +def cli(ctx, opt_dir_in, opt_dir_out, opt_ext, opt_size, opt_crop_type): + """Crop, mirror images""" + + import os + from os.path import join + from pathlib import Path + from glob import glob + from tqdm import tqdm + + + from app.utils import logger_utils, file_utils, im_utils + + # ------------------------------------------------- + # process here + + log = logger_utils.Logger.getLogger() + log.info('crop images') + + # get list of files to process + fp_ims = glob(join(opt_dir_in, '*.{}'.format(opt_ext))) + log.debug('files: {}'.format(len(fp_ims))) + + # ensure output dir exists + file_utils.mkdirs(opt_dir_out) + + for fp_im in tqdm(fp_ims): + im = process_crop(fp_im, opt_size, opt_crop_type) + fp_out = join(opt_dir_out, Path(fp_im).name) + im.save(fp_out) + + +def process_crop(fp_im, opt_size, crop_type): + im = Image.open(fp_im) + if crop_type == 'center': + im = crop_square_fit(im, opt_size) + elif crop_type == 'mirror': + im = mirror_crop_square(im, opt_size) + return im + +def crop_square_fit(im, size, center=(0.5, 0.5)): + return ImageOps.fit(im, size, method=Image.BICUBIC, centering=center) + +def mirror_crop_square(im, size): + # force to even dims + if im.size[0] % 2 or im.size[1] % 2: + im = ImageOps.fit(im, ((im.size[0] // 2) * 2, (im.size[1] // 2) * 2)) + + # create new square image + min_size, max_size = (min(im.size), max(im.size)) + orig_w, orig_h = im.size + margin = (max_size - min_size) // 2 + w, h = (max_size, max_size) + im_new = Image.new('RGB', (w, h), color=(0, 0, 0)) + + #crop (l, t, r, b) + if orig_w > orig_h: + # landscape, mirror expand T/B + im_top = ImageOps.mirror(im.crop((0, 0, margin, w))) + im_bot = ImageOps.mirror(im.crop((orig_h - margin, 0, orig_h, w))) + im_new.paste(im_top, (0, 0)) + im_new.paste(im, (margin, 0, orig_h + margin, w)) + im_new.paste(im_bot, (h - margin, 0)) + elif orig_h > orig_w: + # portrait, mirror expand L/R + im_left = ImageOps.mirror(im.crop((0, 0, margin, h))) + im_right = ImageOps.mirror(im.crop((orig_w - margin, 0, orig_w, h))) + im_new.paste(im_left, (0, 0)) + im_new.paste(im, (margin, 0, orig_w + margin, h)) + im_new.paste(im_right, (w - margin, 0)) + + return im_new.resize(size) + + +def center_crop_face(): + pass + +def center_crop_person(): + pass
\ No newline at end of file diff --git a/megapixels/datasets/commands/extract.py b/megapixels/datasets/commands/extract.py new file mode 100644 index 00000000..4e77a978 --- /dev/null +++ b/megapixels/datasets/commands/extract.py @@ -0,0 +1,86 @@ +""" +Crop images to prepare for training +""" + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input CSV') +@click.option('--media', 'opt_dir_media', required=True, + help='Input image/video directory') +@click.option('-o', '--output', 'opt_dir_out', required=True, + help='Output directory for extracted ROI images') +@click.option('--size', 'opt_size', + type=(int, int), default=(300, 300), + help='Output image size') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('--padding', 'opt_padding', default=0, + help='Facial padding') +@click.option('--ext', 'opt_ext_out', default='jpg', type=click.Choice(['jpg', 'png']), + help='Output image type') +@click.pass_context +def cli(ctx, opt_fp_in, opt_dir_media, opt_dir_out, opt_size, opt_slice, + opt_padding, opt_ext_out): + """Extrace ROIs to images""" + + import os + from os.path import join + from pathlib import Path + from glob import glob + + from tqdm import tqdm + import numpy as np + from PIL import Image, ImageOps, ImageFilter, ImageDraw + import cv2 as cv + import pandas as pd + + from app.utils import logger_utils, file_utils, im_utils + from app.models.bbox import BBox + # ------------------------------------------------- + # process here + log = logger_utils.Logger.getLogger() + + df_rois = pd.read_csv(opt_fp_in) + if opt_slice: + df_rois = df_rois[opt_slice[0]:opt_slice[1]] + + log.info('Processing {:,} rows'.format(len(df_rois))) + + file_utils.mkdirs(opt_dir_out) + + df_rois_grouped = df_rois.groupby(['fn']) # group by fn/filename + groups = df_rois_grouped.groups + + for group in groups: + + # get image + group_rows = df_rois_grouped.get_group(group) + + row = group_rows.iloc[0] + fp_im = join(opt_dir_media, '{fn}{ext}'.format(**row)) #TODO change to ext + im = Image.open(fp_im) + + + for idx, roi in group_rows.iterrows(): + log.info('{}'.format(roi['fn'])) + # get bbox to im dimensions + xywh = [roi['x'], roi['y'], roi['w'] , roi['h']] + bbox = BBox.from_xywh(*xywh) + dim = im.size + bbox_dim = bbox.to_dim(dim) + # expand + bbox_dim_exp = bbox_dim.expand_dim(opt_padding, dim) + # crop + x1y2 = bbox_dim_exp.pt_tl + bbox_dim_exp.pt_br + im_crop = im.crop(box=x1y2) + # save + idx_zpad = file_utils.zpad(idx, zeros=3) + fp_im_out = join(opt_dir_out, '{}_{}.{}'.format(roi['fn'], idx_zpad, opt_ext_out)) + im_crop.save(fp_im_out) + diff --git a/megapixels/datasets/commands/face.py b/megapixels/datasets/commands/face.py new file mode 100644 index 00000000..6b7b18b7 --- /dev/null +++ b/megapixels/datasets/commands/face.py @@ -0,0 +1,117 @@ +""" +Crop images to prepare for training +""" + +import click +# from PIL import Image, ImageOps, ImageFilter, ImageDraw + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +@click.command() +@click.option('-i', '--input', 'opt_dir_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output CSV') +@click.option('-e', '--ext', 'opt_ext', + default='jpg', type=click.Choice(['jpg', 'png']), + help='File glob ext') +@click.option('--size', 'opt_size', + type=(int, int), default=(300, 300), + help='Output image size') +@click.option('-t', '--detector-type', 'opt_detector_type', + type=cfg.FaceDetectNetVar, + default=click_utils.get_default(types.FaceDetectNet.DLIB_CNN), + help=click_utils.show_help(types.FaceDetectNet)) +@click.option('-g', '--gpu', 'opt_gpu', default=0, + help='GPU index') +@click.option('--conf', 'opt_conf_thresh', default=0.85, type=click.FloatRange(0,1), + help='Confidence minimum threshold') +@click.option('--pyramids', 'opt_pyramids', default=0, type=click.IntRange(0,4), + help='Number pyramids to upscale for DLIB detectors') +@click.option('--slice', 'opt_slice', type=(int, int), default=(None, None), + help='Slice list of files') +@click.option('--display/--no-display', 'opt_display', is_flag=True, default=False, + help='Display detections to debug') +@click.pass_context +def cli(ctx, opt_dir_in, opt_fp_out, opt_ext, opt_size, opt_detector_type, + opt_gpu, opt_conf_thresh, opt_pyramids, opt_slice, opt_display): + """Extrace face""" + + import sys + import os + from os.path import join + from pathlib import Path + from glob import glob + from tqdm import tqdm + import numpy as np + import dlib # must keep a local reference for dlib + import cv2 as cv + import pandas as pd + + from app.utils import logger_utils, file_utils, im_utils + from app.processors import face_detector + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + if opt_detector_type == types.FaceDetectNet.CVDNN: + detector = face_detector.DetectorCVDNN() + elif opt_detector_type == types.FaceDetectNet.DLIB_CNN: + detector = face_detector.DetectorDLIBCNN(opt_gpu) + elif opt_detector_type == types.FaceDetectNet.DLIB_HOG: + detector = face_detector.DetectorDLIBHOG() + elif opt_detector_type == types.FaceDetectNet.HAAR: + log.error('{} not yet implemented'.format(opt_detector_type.name)) + return + + + # ------------------------------------------------- + # process here + + # get list of files to process + fp_ims = glob(join(opt_dir_in, '*.{}'.format(opt_ext))) + if opt_slice: + fp_ims = fp_ims[opt_slice[0]:opt_slice[1]] + log.debug('processing {:,} files'.format(len(fp_ims))) + + + data = [] + + for fp_im in tqdm(fp_ims): + im = cv.imread(fp_im) + bboxes = detector.detect(im, opt_size=opt_size, opt_pyramids=opt_pyramids) + fpp_im = Path(fp_im) + for bbox in bboxes: + roi = { + 'fn': fpp_im.stem, + 'ext': fpp_im.suffix, + 'x': bbox.x, + 'y': bbox.y, + 'w': bbox.w, + 'h': bbox.h} + dim = bbox.to_dim(im.shape[:2][::-1]) # w,h + data.append(roi) + + # debug display + if opt_display and len(bboxes): + im_md = im_utils.resize(im, width=opt_size[0]) + for bbox in bboxes: + dim = bbox.to_dim(im_md.shape[:2][::-1]) + cv.rectangle(im_md, dim.pt_tl, dim.pt_br, (0,255,0), 3) + cv.imshow('', im_md) + while True: + k = cv.waitKey(1) & 0xFF + if k == 27 or k == ord('q'): # ESC + cv.destroyAllWindows() + sys.exit() + elif k != 255: + # any key to continue + break + + # save date + df = pd.DataFrame.from_dict(data) + df.to_csv(opt_fp_out)
\ No newline at end of file diff --git a/megapixels/datasets/commands/resize.py b/megapixels/datasets/commands/resize.py new file mode 100644 index 00000000..5e2d31aa --- /dev/null +++ b/megapixels/datasets/commands/resize.py @@ -0,0 +1,81 @@ +""" +Crop images to prepare for training +""" + +import click + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +""" +Filter Q-Down Q-Up Speed +NEAREST ⭐⭐⭐⭐⭐ +BOX ⭐ ⭐⭐⭐⭐ +BILINEAR ⭐ ⭐ ⭐⭐⭐ +HAMMING ⭐⭐ ⭐⭐⭐ +BICUBIC ⭐⭐⭐ ⭐⭐⭐ ⭐⭐ +LANCZOS ⭐⭐⭐⭐ ⭐⭐⭐⭐ ⭐ +""" + +@click.command() +@click.option('-i', '--input', 'opt_dir_in', required=True, + help='Input directory') +@click.option('-o', '--output', 'opt_dir_out', required=True, + help='Output directory') +@click.option('-e', '--ext', 'opt_glob_ext', + default='jpg', type=click.Choice(['jpg', 'png']), + help='File glob ext') +@click.option('--size', 'opt_size', + type=(int, int), default=(256, 256), + help='Output image size (square)') +@click.option('--method', 'opt_scale_method', + type=click.Choice(['LANCZOS', 'BICUBIC', 'HAMMING', 'BILINEAR', 'BOX', 'NEAREST']), + default='LANCZOS', + help='Scaling method to use') +@click.pass_context +def cli(ctx, opt_dir_in, opt_dir_out, opt_glob_ext, opt_size, opt_scale_method): + """Crop, mirror images""" + + import os + from os.path import join + from pathlib import Path + from glob import glob + from tqdm import tqdm + from PIL import Image, ImageOps, ImageFilter + from app.utils import logger_utils, file_utils, im_utils + + # ------------------------------------------------- + # init + + log = logger_utils.Logger.getLogger() + + methods = { + 'LANCZOS': Image.LANCZOS, + 'BICUBIC': Image.BICUBIC, + 'HAMMING': Image.HAMMING, + 'BILINEAR': Image.BILINEAR, + 'BOX': Image.BOX, + 'NEAREST': Image.NEAREST + } + + # ------------------------------------------------- + # process here + + # get list of files to process + fp_ims = glob(join(opt_dir_in, '*.{}'.format(opt_glob_ext))) + log.info('processing {:,} files'.format(len(fp_ims))) + + # set scale method + scale_method = methods[opt_scale_method] + + # ensure output dir exists + file_utils.mkdirs(opt_dir_out) + + # resize and save images + for fp_im in tqdm(fp_ims): + im = Image.open(fp_im) + im = ImageOps.fit(im, opt_size, method=scale_method, centering=(0.5, 0.5)) + fp_out = join(opt_dir_out, Path(fp_im).name) + im.save(fp_out) + diff --git a/notes.md b/notes.md new file mode 100644 index 00000000..9dcf3da6 --- /dev/null +++ b/notes.md @@ -0,0 +1,70 @@ +PATH=/home/adam/torch/install/bin:/home/adam/anaconda3/bin:/home/adam/anaconda3/envs/megapixels/bin:/home/adam/anaconda3/bin:/home/adam/.nvm/versions/node/v9.9.0/bin:/home/adam/bin:/home/adam/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/lib/nvidia/:/usr/local/cuda/bin:/usr/lib/nvidia/:/usr/local/cuda/bin + +PATH=/home/adam/anaconda3/bin:/home/adam/.nvm/versions/node/v9.9.0/bin:/home/adam/code/google-cloud-sdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/usr/local/cuda/bin + +CUDA_HOME=/usr/local/cuda +LD_LIBRARY_PATH=/home/adam/torch/install/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64 + + + +LD_LIBRARY_PATH=/home/adam/torch/install/lib::/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64 + + +LD_LIBRARY_PATH=:/usr/local/cuda/lib64 +CUDA_HOME=/usr/local/cuda + + +TORCH_NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ +TORCH_NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ + + +export PATH=/usr/local/cuda/bin:"$PATH +./clean.sh +export TORCH_NVCC_FLAGS="-D__CUDA_NO_HALF_OPERATORS__" +./install.sh + +find . -name "*.JPEG" | xargs -I {} convert {} -resize "256^>" {} + +find . -name \*.png -exec identify -ping {} \; -or -exec echo {} \; +find . -name \*.jpg -exec identify -ping {} \; -or -exec rm -f {} \; + +luarocks install cudnn +luarocks install display + +scp undisclosed:/home/adam/FIDs.zip . +unzip -q FIDs.zip +find FIDs_NEW -name \*.jpg > list.txt +mkdir -p /work/megapixels_dev/3rdparty/art-DCGAN/fiw/images/ + +while read -r line;do dst=/work/megapixels_dev/3rdparty/art-DCGAN/hipsterwars/images/$(basename "$line");src=`pwd`/$line;ln -s $src $dst;done < list.txt + +extension="${filename##*.}" + + +filename="${filename%.*} + +for d in $(find source -type d) + do + ls $d/*.bin 1>/dev/null 2>&1 && ln -s $d/*.bin target/$(basename $d).dat;done + +gpu=0 batchSize=1 imsize=10 noisemode=linefull net=bedrooms_4_net_G.t7 th generate.lua + +DATA_ROOT=fiw dataset=folder ndf=50 ngf=150 name=fiw_01 nThreads=6 gpu=2 th main.lua + +DATA_ROOT=megaface_13 dataset=folder ndf=50 ngf=150 name=megaface_13 nThreads=6 gpu=1 th main.lua + +DATA_ROOT=hipsterwars dataset=folder ndf=50 ngf=150 name=hipsterwars nThreads=6 gpu=2 th main.lua + +export PATH=/usr/local/cuda/bin/:$PATH + +export LD_LIBRARY_PATH=/usr/local/cuda/lib64/:$LD_LIBRARY_PATH + + +git clone https://github.com/soumith/cudnn.torch.git -b R7 && cd cudnn.torch && luarocks make cudnn-scm-1.rockspec + +scp undisclosed:/home/adam/hipsterwars_v1.0.zip . +find . -name "*.jpg" -print0 | xargs -0 mogrify -flop + +https://github.com/facebookresearch/deepcluster + +DATA_ROOT=hipsterwars dataset=folder ndf=100 ngf=200 batchSize=128 name=hipsterwars_d100_g200_b128 nThreads=8 gpu=2 th main.lua && DATA_ROOT=hipsterwars dataset=folder ndf=100 ngf=200 batchSize=64 name=hipsterwars_d100_g200_b64 nThreads=8 gpu=2 th main.lua && DATA_ROOT=hipsterwars dataset=folder ndf=100 ngf=300 batchSize=128 name=hipsterwars_d100_g300_b128 nThreads=8 gpu=2 th main.lua && DATA_ROOT=hipsterwars dataset=folder ndf=100 ngf=300 batchSize=64 name=hipsterwars_d100_g200_b64 nThreads=8 gpu=2 th main.lua
\ No newline at end of file diff --git a/notes/cheat-sheets/bash-cheat-sheet.sh b/notes/cheat-sheets/bash-cheat-sheet.sh new file mode 100644 index 00000000..dee67440 --- /dev/null +++ b/notes/cheat-sheets/bash-cheat-sheet.sh @@ -0,0 +1,410 @@ +# bash cheat sheet + +#!/bin/bash +##################################################### +# Name: Bash CheatSheet for Mac OSX +# +# A little overlook of the Bash basics +# +# Usage: +# +# Author: J. Le Coupanec +# Date: 2014/11/04 +##################################################### + + +# 0. Shortcuts. + + +CTRL+A # move to beginning of line +CTRL+B # moves backward one character +CTRL+C # halts the current command +CTRL+D # deletes one character backward or logs out of current session, similar to exit +CTRL+E # moves to end of line +CTRL+F # moves forward one character +CTRL+G # aborts the current editing command and ring the terminal bell +CTRL+J # same as RETURN +CTRL+K # deletes (kill) forward to end of line +CTRL+L # clears screen and redisplay the line +CTRL+M # same as RETURN +CTRL+N # next line in command history +CTRL+O # same as RETURN, then displays next line in history file +CTRL+P # previous line in command history +CTRL+R # searches backward +CTRL+S # searches forward +CTRL+T # transposes two characters +CTRL+U # kills backward from point to the beginning of line +CTRL+V # makes the next character typed verbatim +CTRL+W # kills the word behind the cursor +CTRL+X # lists the possible filename completefions of the current word +CTRL+Y # retrieves (yank) last item killed +CTRL+Z # stops the current command, resume with fg in the foreground or bg in the background + +DELETE # deletes one character backward +!! # repeats the last command +exit # logs out of current session + + +# 1. Bash Basics. + + +export # displays all environment variables + +echo $SHELL # displays the shell you're using +echo $BASH_VERSION # displays bash version + +bash # if you want to use bash (type exit to go back to your normal shell) +whereis bash # finds out where bash is on your system + +clear # clears content on window (hide displayed lines) + + +# 1.1. File Commands. + + +ls # lists your files +ls -l # lists your files in 'long format', which contains the exact size of the file, who owns the file and who has the right to look at it, and when it was last modified +ls -a # lists all files, including hidden files +ln -s <filename> <link> # creates symbolic link to file +touch <filename> # creates or updates your file +cat > <filename> # places standard input into file +more <filename> # shows the first part of a file (move with space and type q to quit) +head <filename> # outputs the first 10 lines of file +tail <filename> # outputs the last 10 lines of file (useful with -f option) +emacs <filename> # lets you create and edit a file +mv <filename1> <filename2> # moves a file +cp <filename1> <filename2> # copies a file +rm <filename> # removes a file +diff <filename1> <filename2> # compares files, and shows where they differ +wc <filename> # tells you how many lines, words and characters there are in a file +chmod -options <filename> # lets you change the read, write, and execute permissions on your files +gzip <filename> # compresses files +gunzip <filename> # uncompresses files compressed by gzip +gzcat <filename> # lets you look at gzipped file without actually having to gunzip it +lpr <filename> # print the file +lpq # check out the printer queue +lprm <jobnumber> # remove something from the printer queue +genscript # converts plain text files into postscript for printing and gives you some options for formatting +dvips <filename> # print .dvi files (i.e. files produced by LaTeX) +grep <pattern> <filenames> # looks for the string in the files +grep -r <pattern> <dir> # search recursively for pattern in directory + + +# 1.2. Directory Commands. + + +mkdir <dirname> # makes a new directory +cd # changes to home +cd <dirname> # changes directory +pwd # tells you where you currently are + + +# 1.3. SSH, System Info & Network Commands. + + +ssh user@host # connects to host as user +ssh -p <port> user@host # connects to host on specified port as user +ssh-copy-id user@host # adds your ssh key to host for user to enable a keyed or passwordless login + +whoami # returns your username +passwd # lets you change your password +quota -v # shows what your disk quota is +date # shows the current date and time +cal # shows the month's calendar +uptime # shows current uptime +w # displays whois online +finger <user> # displays information about user +uname -a # shows kernel information +man <command> # shows the manual for specified command +df # shows disk usage +du <filename> # shows the disk usage of the files and directories in filename (du -s give only a total) +last <yourUsername> # lists your last logins +ps -u yourusername # lists your processes +kill <PID> # kills (ends) the processes with the ID you gave +killall <processname> # kill all processes with the name +top # displays your currently active processes +bg # lists stopped or background jobs ; resume a stopped job in the background +fg # brings the most recent job in the foreground +fg <job> # brings job to the foreground + +ping <host> # pings host and outputs results +whois <domain> # gets whois information for domain +dig <domain> # gets DNS information for domain +dig -x <host> # reverses lookup host +wget <file> # downloads file + + +# 2. Basic Shell Programming. + + +# 2.1. Variables. + + +varname=value # defines a variable +varname=value command # defines a variable to be in the environment of a particular subprocess +echo $varname # checks a variable's value +echo $$ # prints process ID of the current shell +echo $! # prints process ID of the most recently invoked background job +echo $? # displays the exit status of the last command +export VARNAME=value # defines an environment variable (will be available in subprocesses) + +array[0] = val # several ways to define an array +array[1] = val +array[2] = val +array=([2]=val [0]=val [1]=val) +array(val val val) + +${array[i]} # displays array's value for this index. If no index is supplied, array element 0 is assumed +${#array[i]} # to find out the length of any element in the array +${#array[@]} # to find out how many values there are in the array + +declare -a # the variables are treaded as arrays +declare -f # uses funtion names only +declare -F # displays function names without definitions +declare -i # the variables are treaded as integers +declare -r # makes the variables read-only +declare -x # marks the variables for export via the environment + +${varname:-word} # if varname exists and isn't null, return its value; otherwise return word +${varname:=word} # if varname exists and isn't null, return its value; otherwise set it word and then return its value +${varname:?message} # if varname exists and isn't null, return its value; otherwise print varname, followed by message and abort the current command or script +${varname:+word} # if varname exists and isn't null, return word; otherwise return null +${varname:offset:length} # performs substring expansion. It returns the substring of $varname starting at offset and up to length characters + +${variable#pattern} # if the pattern matches the beginning of the variable's value, delete the shortest part that matches and return the rest +${variable##pattern} # if the pattern matches the beginning of the variable's value, delete the longest part that matches and return the rest +${variable%pattern} # if the pattern matches the end of the variable's value, delete the shortest part that matches and return the rest +${variable%%pattern} # if the pattern matches the end of the variable's value, delete the longest part that matches and return the rest +${variable/pattern/string} # the longest match to pattern in variable is replaced by string. Only the first match is replaced +${variable//pattern/string} # the longest match to pattern in variable is replaced by string. All matches are replaced + +${#varname} # returns the length of the value of the variable as a character string + +*(patternlist) # matches zero or more occurences of the given patterns ++(patternlist) # matches one or more occurences of the given patterns +?(patternlist) # matches zero or one occurence of the given patterns +@(patternlist) # matches exactly one of the given patterns +!(patternlist) # matches anything except one of the given patterns + +$(UNIX command) # command substitution: runs the command and returns standard output + + +# 2.2. Functions. +# The function refers to passed arguments by position (as if they were positional parameters), that is, $1, $2, and so forth. +# $@ is equal to "$1" "$2"... "$N", where N is the number of positional parameters. $# holds the number of positional parameters. + + +functname() { + shell commands +} + +unset -f functname # deletes a function definition +declare -f # displays all defined functions in your login session + + +# 2.3. Flow Control. + + +statement1 && statement2 # and operator +statement1 || statement2 # or operator + +-a # and operator inside a test conditional expression +-o # or operator inside a test conditional expression + +str1=str2 # str1 matches str2 +str1!=str2 # str1 does not match str2 +str1<str2 # str1 is less than str2 +str1>str2 # str1 is greater than str2 +-n str1 # str1 is not null (has length greater than 0) +-z str1 # str1 is null (has length 0) + +-a file # file exists +-d file # file exists and is a directory +-e file # file exists; same -a +-f file # file exists and is a regular file (i.e., not a directory or other special type of file) +-r file # you have read permission +-r file # file exists and is not empty +-w file # your have write permission +-x file # you have execute permission on file, or directory search permission if it is a directory +-N file # file was modified since it was last read +-O file # you own file +-G file # file's group ID matches yours (or one of yours, if you are in multiple groups) +file1 -nt file2 # file1 is newer than file2 +file1 -ot file2 # file1 is older than file2 + +-lt # less than +-le # less than or equal +-eq # equal +-ge # greater than or equal +-gt # greater than +-ne # not equal + +if condition +then + statements +[elif condition + then statements...] +[else + statements] +fi + +for x := 1 to 10 do +begin + statements +end + +for name [in list] +do + statements that can use $name +done + +for (( initialisation ; ending condition ; update )) +do + statements... +done + +case expression in + pattern1 ) + statements ;; + pattern2 ) + statements ;; + ... +esac + +select name [in list] +do + statements that can use $name +done + +while condition; do + statements +done + +until condition; do + statements +done + + +# 3. Command-Line Processing Cycle. + + +# The default order for command lookup is functions, followed by built-ins, with scripts and executables last. +# There are three built-ins that you can use to override this order: `command`, `builtin` and `enable`. + +command # removes alias and function lookup. Only built-ins and commands found in the search path are executed +builtin # looks up only built-in commands, ignoring functions and commands found in PATH +enable # enables and disables shell built-ins + +eval # takes arguments and run them through the command-line processing steps all over again + + +# 4. Input/Output Redirectors. + + +cmd1|cmd2 # pipe; takes standard output of cmd1 as standard input to cmd2 +> file # directs standard output to file +< file # takes standard input from file +>> file # directs standard output to file; append to file if it already exists +>|file # forces standard output to file even if noclobber is set +n>|file # forces output to file from file descriptor n even if noclobber is set +<> file # uses file as both standard input and standard output +n<>file # uses file as both input and output for file descriptor n +<<label # here-document +n>file # directs file descriptor n to file +n<file # takes file descriptor n from file +n>>file # directs file description n to file; append to file if it already exists +n>& # duplicates standard output to file descriptor n +n<& # duplicates standard input from file descriptor n +n>&m # file descriptor n is made to be a copy of the output file descriptor +n<&m # file descriptor n is made to be a copy of the input file descriptor +&>file # directs standard output and standard error to file +<&- # closes the standard input +>&- # closes the standard output +n>&- # closes the ouput from file descriptor n +n<&- # closes the input from file descripor n + + +# 5. Process Handling. + + +# To suspend a job, type CTRL+Z while it is running. You can also suspend a job with CTRL+Y. +# This is slightly different from CTRL+Z in that the process is only stopped when it attempts to read input from terminal. +# Of course, to interupt a job, type CTRL+C. + +myCommand & # runs job in the background and prompts back the shell + +jobs # lists all jobs (use with -l to see associated PID) + +fg # brings a background job into the foreground +fg %+ # brings most recently invoked background job +fg %- # brings second most recently invoked background job +fg %N # brings job number N +fg %string # brings job whose command begins with string +fg %?string # brings job whose command contains string + +kill -l # returns a list of all signals on the system, by name and number +kill PID # terminates process with specified PID + +ps # prints a line of information about the current running login shell and any processes running under it +ps -a # selects all processes with a tty except session leaders + +trap cmd sig1 sig2 # executes a command when a signal is received by the script +trap "" sig1 sig2 # ignores that signals +trap - sig1 sig2 # resets the action taken when the signal is received to the default + +disown <PID|JID> # removes the process from the list of jobs + +wait # waits until all background jobs have finished + + +# 6. Tips and Tricks. + + +# set an alias +cd; nano .bash_profile +> alias gentlenode='ssh admin@gentlenode.com -p 3404' # add your alias in .bash_profile + +# to quickly go to a specific directory +cd; nano .bashrc +> shopt -s cdable_vars +> export websites="/Users/mac/Documents/websites" + +source .bashrc +cd websites + + +# 7. Debugging Shell Programs. + + +bash -n scriptname # don't run commands; check for syntax errors only +set -o noexec # alternative (set option in script) + +bash -v scriptname # echo commands before running them +set -o verbose # alternative (set option in script) + +bash -x scriptname # echo commands after command-line processing +set -o xtrace # alternative (set option in script) + +trap 'echo $varname' EXIT # useful when you want to print out the values of variables at the point that your script exits + +function errtrap { + es=$? + echo "ERROR line $1: Command exited with status $es." +} + +trap 'errtrap $LINENO' ERR # is run whenever a command in the surrounding script or function exists with non-zero status + +function dbgtrap { + echo "badvar is $badvar" +} + +trap dbgtrap DEBUG # causes the trap code to be executed before every statement in a function or script +# ...section of code in which the problem occurs... +trap - DEBUG # turn off the DEBUG trap + +function returntrap { + echo "A return occured" +} + +trap returntrap RETURN # is executed each time a shell function or a script executed with the . or source commands finishes executing + diff --git a/notes/cheat-sheets/docker-cheat-sheet.md b/notes/cheat-sheets/docker-cheat-sheet.md new file mode 100644 index 00000000..4e11e24f --- /dev/null +++ b/notes/cheat-sheets/docker-cheat-sheet.md @@ -0,0 +1,795 @@ +# Docker Cheat Sheet + +Source: <https://github.com/wsargent/docker-cheat-sheet/blob/master/README.md> + + +## Table of Contents + +* [Why Docker](#why-docker) +* [Prerequisites](#prerequisites) +* [Installation](#installation) +* [Containers](#containers) +* [Images](#images) +* [Networks](#networks) +* [Registry and Repository](#registry--repository) +* [Dockerfile](#dockerfile) +* [Layers](#layers) +* [Links](#links) +* [Volumes](#volumes) +* [Exposing Ports](#exposing-ports) +* [Best Practices](#best-practices) +* [Security](#security) +* [Tips](#tips) +* [Contributing](#contributing) + +## Why Docker + +"With Docker, developers can build any app in any language using any toolchain. “Dockerized” apps are completely portable and can run anywhere - colleagues’ OS X and Windows laptops, QA servers running Ubuntu in the cloud, and production data center VMs running Red Hat. + +Developers can get going quickly by starting with one of the 13,000+ apps available on Docker Hub. Docker manages and tracks changes and dependencies, making it easier for sysadmins to understand how the apps that developers build work. And with Docker Hub, developers can automate their build pipeline and share artifacts with collaborators through public or private repositories. + +Docker helps developers build and ship higher-quality applications, faster." -- [What is Docker](https://www.docker.com/what-docker#copy1) + +## Prerequisites + +I use [Oh My Zsh](https://github.com/robbyrussell/oh-my-zsh) with the [Docker plugin](https://github.com/robbyrussell/oh-my-zsh/wiki/Plugins#docker) for autocompletion of docker commands. YMMV. + +### Linux + +The 3.10.x kernel is [the minimum requirement](https://docs.docker.com/engine/installation/binaries/#check-kernel-dependencies) for Docker. + +### MacOS + + 10.8 “Mountain Lion” or newer is required. + +## Installation + +### Linux + +Quick and easy install script provided by Docker: + +``` +curl -sSL https://get.docker.com/ | sh +``` + +If you're not willing to run a random shell script, please see the [installation](https://docs.docker.com/engine/installation/linux/) instructions for your distribution. + +If you are a complete Docker newbie, you should follow the [series of tutorials](https://docs.docker.com/engine/getstarted/) now. + +### macOS +Download and install [Docker Community Edition](https://www.docker.com/community-edition). if you have Homebrew-Cask, just type `brew cask install docker`. Or Download and install [Docker Toolbox](https://docs.docker.com/toolbox/overview/). [Docker For Mac](https://docs.docker.com/docker-for-mac/) is nice, but it's not quite as finished as the VirtualBox install. [See the comparison](https://docs.docker.com/docker-for-mac/docker-toolbox/). + +> **NOTE** Docker Toolbox is legacy. you should to use Docker Community Edition, See (Docker Toolbox)[https://docs.docker.com/toolbox/overview/] + +Once you've installed Docker Community Edition, click the docker icon in Launchpad. Then start up a container: + +``` +docker run hello-world +``` + +That's it, you have a running Docker container. + +If you are a complete Docker newbie, you should probably follow the [series of tutorials](https://docs.docker.com/engine/getstarted/) now. + +## Containers + +[Your basic isolated Docker process](http://etherealmind.com/basics-docker-containers-hypervisors-coreos/). Containers are to Virtual Machines as threads are to processes. Or you can think of them as chroots on steroids. + +### Lifecycle + +* [`docker create`](https://docs.docker.com/engine/reference/commandline/create) creates a container but does not start it. +* [`docker rename`](https://docs.docker.com/engine/reference/commandline/rename/) allows the container to be renamed. +* [`docker run`](https://docs.docker.com/engine/reference/commandline/run) creates and starts a container in one operation. +* [`docker rm`](https://docs.docker.com/engine/reference/commandline/rm) deletes a container. +* [`docker update`](https://docs.docker.com/engine/reference/commandline/update/) updates a container's resource limits. + +Normally if you run a container without options it will start and stop immediately, if you want keep it running you can use the command, `docker run -td container_id` this will use the option `-t` that will allocate a pseudo-TTY session and `-d` that will detach automatically the container (run container in background and print container ID). + +If you want a transient container, `docker run --rm` will remove the container after it stops. + +If you want to map a directory on the host to a docker container, `docker run -v $HOSTDIR:$DOCKERDIR`. Also see [Volumes](https://github.com/wsargent/docker-cheat-sheet/#volumes). + +If you want to remove also the volumes associated with the container, the deletion of the container must include the `-v` switch like in `docker rm -v`. + +There's also a [logging driver](https://docs.docker.com/engine/admin/logging/overview/) available for individual containers in docker 1.10. To run docker with a custom log driver (i.e., to syslog), use `docker run --log-driver=syslog`. + +Another useful option is `docker run --name yourname docker_image` because when you specify the `--name` inside the run command this will allow you to start and stop a container by calling it with the name the you specified when you created it. + +### Starting and Stopping + +* [`docker start`](https://docs.docker.com/engine/reference/commandline/start) starts a container so it is running. +* [`docker stop`](https://docs.docker.com/engine/reference/commandline/stop) stops a running container. +* [`docker restart`](https://docs.docker.com/engine/reference/commandline/restart) stops and starts a container. +* [`docker pause`](https://docs.docker.com/engine/reference/commandline/pause/) pauses a running container, "freezing" it in place. +* [`docker unpause`](https://docs.docker.com/engine/reference/commandline/unpause/) will unpause a running container. +* [`docker wait`](https://docs.docker.com/engine/reference/commandline/wait) blocks until running container stops. +* [`docker kill`](https://docs.docker.com/engine/reference/commandline/kill) sends a SIGKILL to a running container. +* [`docker attach`](https://docs.docker.com/engine/reference/commandline/attach) will connect to a running container. + +If you want to integrate a container with a [host process manager](https://docs.docker.com/engine/admin/host_integration/), start the daemon with `-r=false` then use `docker start -a`. + +If you want to expose container ports through the host, see the [exposing ports](#exposing-ports) section. + +Restart policies on crashed docker instances are [covered here](http://container42.com/2014/09/30/docker-restart-policies/). + +#### CPU Constraints + +You can limit CPU, either using a percentage of all CPUs, or by using specific cores. + +For example, you can tell the [`cpu-shares`](https://docs.docker.com/engine/reference/run/#/cpu-share-constraint) setting. The setting is a bit strange -- 1024 means 100% of the CPU, so if you want the container to take 50% of all CPU cores, you should specify 512. See https://goldmann.pl/blog/2014/09/11/resource-management-in-docker/#_cpu for more: + +``` +docker run -ti --c 512 agileek/cpuset-test +``` + +You can also only use some CPU cores using [`cpuset-cpus`](https://docs.docker.com/engine/reference/run/#/cpuset-constraint). See https://agileek.github.io/docker/2014/08/06/docker-cpuset/ for details and some nice videos: + +``` +docker run -ti --cpuset-cpus=0,4,6 agileek/cpuset-test +``` + +Note that Docker can still **see** all of the CPUs inside the container -- it just isn't using all of them. See https://github.com/docker/docker/issues/20770 for more details. + +#### Memory Constraints + +You can also set [memory constraints](https://docs.docker.com/engine/reference/run/#/user-memory-constraints) on Docker: + +``` +docker run -it -m 300M ubuntu:14.04 /bin/bash +``` + +#### Capabilities + +Linux capabilities can be set by using `cap-add` and `cap-drop`. See https://docs.docker.com/engine/reference/run/#/runtime-privilege-and-linux-capabilities for details. This should be used for greater security. + +To mount a FUSE based filesystem, you need to combine both --cap-add and --device: + +``` +docker run --rm -it --cap-add SYS_ADMIN --device /dev/fuse sshfs +``` + +Give access to a single device: + +``` +docker run -it --device=/dev/ttyUSB0 debian bash +``` + +Give access to all devices: + +``` +docker run -it --privileged -v /dev/bus/usb:/dev/bus/usb debian bash +``` + +more info about privileged containers [here]( +https://docs.docker.com/engine/reference/run/#/runtime-privilege-and-linux-capabilities) + + +### Info + +* [`docker ps`](https://docs.docker.com/engine/reference/commandline/ps) shows running containers. +* [`docker logs`](https://docs.docker.com/engine/reference/commandline/logs) gets logs from container. (You can use a custom log driver, but logs is only available for `json-file` and `journald` in 1.10). +* [`docker inspect`](https://docs.docker.com/engine/reference/commandline/inspect) looks at all the info on a container (including IP address). +* [`docker events`](https://docs.docker.com/engine/reference/commandline/events) gets events from container. +* [`docker port`](https://docs.docker.com/engine/reference/commandline/port) shows public facing port of container. +* [`docker top`](https://docs.docker.com/engine/reference/commandline/top) shows running processes in container. +* [`docker stats`](https://docs.docker.com/engine/reference/commandline/stats) shows containers' resource usage statistics. +* [`docker diff`](https://docs.docker.com/engine/reference/commandline/diff) shows changed files in the container's FS. + +`docker ps -a` shows running and stopped containers. + +`docker stats --all` shows a running list of containers. + +### Import / Export + +* [`docker cp`](https://docs.docker.com/engine/reference/commandline/cp) copies files or folders between a container and the local filesystem. +* [`docker export`](https://docs.docker.com/engine/reference/commandline/export) turns container filesystem into tarball archive stream to STDOUT. + +### Executing Commands + +* [`docker exec`](https://docs.docker.com/engine/reference/commandline/exec) to execute a command in container. + +To enter a running container, attach a new shell process to a running container called foo, use: `docker exec -it foo /bin/bash`. + +## Images + +Images are just [templates for docker containers](https://docs.docker.com/engine/understanding-docker/#how-does-a-docker-image-work). + +### Lifecycle + +* [`docker images`](https://docs.docker.com/engine/reference/commandline/images) shows all images. +* [`docker import`](https://docs.docker.com/engine/reference/commandline/import) creates an image from a tarball. +* [`docker build`](https://docs.docker.com/engine/reference/commandline/build) creates image from Dockerfile. +* [`docker commit`](https://docs.docker.com/engine/reference/commandline/commit) creates image from a container, pausing it temporarily if it is running. +* [`docker rmi`](https://docs.docker.com/engine/reference/commandline/rmi) removes an image. +* [`docker load`](https://docs.docker.com/engine/reference/commandline/load) loads an image from a tar archive as STDIN, including images and tags (as of 0.7). +* [`docker save`](https://docs.docker.com/engine/reference/commandline/save) saves an image to a tar archive stream to STDOUT with all parent layers, tags & versions (as of 0.7). + +### Info + +* [`docker history`](https://docs.docker.com/engine/reference/commandline/history) shows history of image. +* [`docker tag`](https://docs.docker.com/engine/reference/commandline/tag) tags an image to a name (local or registry). + +## Checking Docker Version + +It is very important that you always know the current version of Docker you are currently running on at any point in time.This is very helpful because you get to know what features are compatible with what you have running. This is also important because you know what containers to run from the docker store when you are trying to get template containers. That said let see how to know what version of docker we have running currently + +* ['docker version'](https://docs.docker.com/engine/reference/commandline/version/) check what version of docker you have running +* [docker version [OPTIONS]] + +Get the server version +$ docker version --format '{{.Server.Version}}' + +1.8.0 +Dump raw JSON data +$ docker version --format '{{json .}}' + +{"Client":{"Version":"1.8.0","ApiVersion":"1.20","GitCommit":"f5bae0a","GoVersion":"go1.4.2","Os":"linux","Arch":"am"} + +### Cleaning up + +While you can use the `docker rmi` command to remove specific images, there's a tool called [docker-gc](https://github.com/spotify/docker-gc) that will safely clean up images that are no longer used by any containers. + +### Load/Save image + +Load an image from file: +``` +docker load < my_image.tar.gz +``` + +Save an existing image: +``` +docker save my_image:my_tag | gzip > my_image.tar.gz +``` + +### Import/Export container + +Import a container as an image from file: +``` +cat my_container.tar.gz | docker import - my_image:my_tag +``` + +Export an existing container: +``` +docker export my_container | gzip > my_container.tar.gz +``` + +### Difference between loading a saved image and importing an exported container as an image + +Loading an image using the `load` command creates a new image including its history. +Importing a container as an image using the `import` command creates a new image excluding the history which results in a smaller image size compared to loading an image. + +## Networks + +Docker has a [networks](https://docs.docker.com/engine/userguide/networking/) feature. Not much is known about it, so this is a good place to expand the cheat sheet. There is a note saying that it's a good way to configure docker containers to talk to each other without using ports. See [working with networks](https://docs.docker.com/engine/userguide/networking/work-with-networks/) for more details. + +### Lifecycle + +* [`docker network create`](https://docs.docker.com/engine/reference/commandline/network_create/) +* [`docker network rm`](https://docs.docker.com/engine/reference/commandline/network_rm/) + +### Info + +* [`docker network ls`](https://docs.docker.com/engine/reference/commandline/network_ls/) +* [`docker network inspect`](https://docs.docker.com/engine/reference/commandline/network_inspect/) + +### Connection + +* [`docker network connect`](https://docs.docker.com/engine/reference/commandline/network_connect/) +* [`docker network disconnect`](https://docs.docker.com/engine/reference/commandline/network_disconnect/) + +You can specify a [specific IP address for a container](https://blog.jessfraz.com/post/ips-for-all-the-things/): + +``` +# create a new bridge network with your subnet and gateway for your ip block +docker network create --subnet 203.0.113.0/24 --gateway 203.0.113.254 iptastic + +# run a nginx container with a specific ip in that block +$ docker run --rm -it --net iptastic --ip 203.0.113.2 nginx + +# curl the ip from any other place (assuming this is a public ip block duh) +$ curl 203.0.113.2 +``` + +## Registry & Repository + +A repository is a *hosted* collection of tagged images that together create the file system for a container. + +A registry is a *host* -- a server that stores repositories and provides an HTTP API for [managing the uploading and downloading of repositories](https://docs.docker.com/engine/tutorials/dockerrepos/). + +Docker.com hosts its own [index](https://hub.docker.com/) to a central registry which contains a large number of repositories. Having said that, the central docker registry [does not do a good job of verifying images](https://titanous.com/posts/docker-insecurity) and should be avoided if you're worried about security. + +* [`docker login`](https://docs.docker.com/engine/reference/commandline/login) to login to a registry. +* [`docker logout`](https://docs.docker.com/engine/reference/commandline/logout) to logout from a registry. +* [`docker search`](https://docs.docker.com/engine/reference/commandline/search) searches registry for image. +* [`docker pull`](https://docs.docker.com/engine/reference/commandline/pull) pulls an image from registry to local machine. +* [`docker push`](https://docs.docker.com/engine/reference/commandline/push) pushes an image to the registry from local machine. + +### Run local registry + +You can run a local registry by using the [docker distribution](https://github.com/docker/distribution) project and looking at the [local deploy](https://github.com/docker/docker.github.io/blob/master/registry/deploying.md) instructions. + +Also see the [mailing list](https://groups.google.com/a/dockerproject.org/forum/#!forum/distribution). + +## Dockerfile + +[The configuration file](https://docs.docker.com/engine/reference/builder/). Sets up a Docker container when you run `docker build` on it. Vastly preferable to `docker commit`. + +Here are some common text editors and their syntax highlighting modules you could use to create Dockerfiles: +* If you use [jEdit](http://jedit.org), I've put up a syntax highlighting module for [Dockerfile](https://github.com/wsargent/jedit-docker-mode) you can use. +* [Sublime Text 2](https://packagecontrol.io/packages/Dockerfile%20Syntax%20Highlighting) +* [Atom](https://atom.io/packages/language-docker) +* [Vim](https://github.com/ekalinin/Dockerfile.vim) +* [Emacs](https://github.com/spotify/dockerfile-mode) +* [TextMate](https://github.com/docker/docker/tree/master/contrib/syntax/textmate) +* [VS Code](https://github.com/Microsoft/vscode-docker) +* Also see [Docker meets the IDE](https://domeide.github.io/) + +### Instructions + +* [.dockerignore](https://docs.docker.com/engine/reference/builder/#dockerignore-file) +* [FROM](https://docs.docker.com/engine/reference/builder/#from) Sets the Base Image for subsequent instructions. +* [MAINTAINER (deprecated - use LABEL instead)](https://docs.docker.com/engine/reference/builder/#maintainer-deprecated) Set the Author field of the generated images. +* [RUN](https://docs.docker.com/engine/reference/builder/#run) execute any commands in a new layer on top of the current image and commit the results. +* [CMD](https://docs.docker.com/engine/reference/builder/#cmd) provide defaults for an executing container. +* [EXPOSE](https://docs.docker.com/engine/reference/builder/#expose) informs Docker that the container listens on the specified network ports at runtime. NOTE: does not actually make ports accessible. +* [ENV](https://docs.docker.com/engine/reference/builder/#env) sets environment variable. +* [ADD](https://docs.docker.com/engine/reference/builder/#add) copies new files, directories or remote file to container. Invalidates caches. Avoid `ADD` and use `COPY` instead. +* [COPY](https://docs.docker.com/engine/reference/builder/#copy) copies new files or directories to container. Note that this only copies as root, so you have to chown manually regardless of your USER / WORKDIR setting. See https://github.com/moby/moby/issues/30110 +* [ENTRYPOINT](https://docs.docker.com/engine/reference/builder/#entrypoint) configures a container that will run as an executable. +* [VOLUME](https://docs.docker.com/engine/reference/builder/#volume) creates a mount point for externally mounted volumes or other containers. +* [USER](https://docs.docker.com/engine/reference/builder/#user) sets the user name for following RUN / CMD / ENTRYPOINT commands. +* [WORKDIR](https://docs.docker.com/engine/reference/builder/#workdir) sets the working directory. +* [ARG](https://docs.docker.com/engine/reference/builder/#arg) defines a build-time variable. +* [ONBUILD](https://docs.docker.com/engine/reference/builder/#onbuild) adds a trigger instruction when the image is used as the base for another build. +* [STOPSIGNAL](https://docs.docker.com/engine/reference/builder/#stopsignal) sets the system call signal that will be sent to the container to exit. +* [LABEL](https://docs.docker.com/engine/userguide/labels-custom-metadata/) apply key/value metadata to your images, containers, or daemons. + +### Tutorial + +* [Flux7's Dockerfile Tutorial](http://flux7.com/blogs/docker/docker-tutorial-series-part-3-automation-is-the-word-using-dockerfile/) + +### Examples + +* [Examples](https://docs.docker.com/engine/reference/builder/#dockerfile-examples) +* [Best practices for writing Dockerfiles](https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/) +* [Michael Crosby](http://crosbymichael.com/) has some more [Dockerfiles best practices](http://crosbymichael.com/dockerfile-best-practices.html) / [take 2](http://crosbymichael.com/dockerfile-best-practices-take-2.html). +* [Building Good Docker Images](http://jonathan.bergknoff.com/journal/building-good-docker-images) / [Building Better Docker Images](http://jonathan.bergknoff.com/journal/building-better-docker-images) +* [Managing Container Configuration with Metadata](https://speakerdeck.com/garethr/managing-container-configuration-with-metadata) +* [How to write excellent Dockerfiles](https://rock-it.pl/how-to-write-excellent-dockerfiles/) + +## Layers + +The versioned filesystem in Docker is based on layers. They're like [git commits or changesets for filesystems](https://docs.docker.com/engine/userguide/storagedriver/imagesandcontainers/). + +## Links + +Links are how Docker containers talk to each other [through TCP/IP ports](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/). [Linking into Redis](https://docs.docker.com/engine/examples/running_redis_service/) and [Atlassian](https://blogs.atlassian.com/2013/11/docker-all-the-things-at-atlassian-automation-and-wiring/) show worked examples. You can also resolve [links by hostname](https://docs.docker.com/engine/userguide/networking/default_network/dockerlinks/#/updating-the-etchosts-file). + +This has been deprected to some extent by [user-defined networks](https://docs.docker.com/engine/userguide/networking/#user-defined-networks). + +NOTE: If you want containers to ONLY communicate with each other through links, start the docker daemon with `-icc=false` to disable inter process communication. + +If you have a container with the name CONTAINER (specified by `docker run --name CONTAINER`) and in the Dockerfile, it has an exposed port: + +``` +EXPOSE 1337 +``` + +Then if we create another container called LINKED like so: + +``` +docker run -d --link CONTAINER:ALIAS --name LINKED user/wordpress +``` + +Then the exposed ports and aliases of CONTAINER will show up in LINKED with the following environment variables: + +``` +$ALIAS_PORT_1337_TCP_PORT +$ALIAS_PORT_1337_TCP_ADDR +``` + +And you can connect to it that way. + +To delete links, use `docker rm --link`. + +Generally, linking between docker services is a subset of "service discovery", a big problem if you're planning to use Docker at scale in production. Please read [The Docker Ecosystem: Service Discovery and Distributed Configuration Stores](https://www.digitalocean.com/community/tutorials/the-docker-ecosystem-service-discovery-and-distributed-configuration-stores) for more info. + +## Volumes + +Docker volumes are [free-floating filesystems](https://docs.docker.com/engine/tutorials/dockervolumes/). They don't have to be connected to a particular container. You should use volumes mounted from [data-only containers](https://medium.com/@ramangupta/why-docker-data-containers-are-good-589b3c6c749e) for portability. + +### Lifecycle + +* [`docker volume create`](https://docs.docker.com/engine/reference/commandline/volume_create/) +* [`docker volume rm`](https://docs.docker.com/engine/reference/commandline/volume_rm/) + +### Info + +* [`docker volume ls`](https://docs.docker.com/engine/reference/commandline/volume_ls/) +* [`docker volume inspect`](https://docs.docker.com/engine/reference/commandline/volume_inspect/) + +Volumes are useful in situations where you can't use links (which are TCP/IP only). For instance, if you need to have two docker instances communicate by leaving stuff on the filesystem. + +You can mount them in several docker containers at once, using `docker run --volumes-from`. + +Because volumes are isolated filesystems, they are often used to store state from computations between transient containers. That is, you can have a stateless and transient container run from a recipe, blow it away, and then have a second instance of the transient container pick up from where the last one left off. + +See [advanced volumes](http://crosbymichael.com/advanced-docker-volumes.html) for more details. Container42 is [also helpful](http://container42.com/2014/11/03/docker-indepth-volumes/). + +You can [map MacOS host directories as docker volumes](https://docs.docker.com/engine/tutorials/dockervolumes/#mount-a-host-directory-as-a-data-volume): + +``` +docker run -v /Users/wsargent/myapp/src:/src +``` + +You can use remote NFS volumes if you're [feeling brave](https://docs.docker.com/engine/tutorials/dockervolumes/#/mount-a-shared-storage-volume-as-a-data-volume). + +You may also consider running data-only containers as described [here](http://container42.com/2013/12/16/persistent-volumes-with-docker-container-as-volume-pattern/) to provide some data portability. + +[Be aware that you can mount files as volumes.](#volumes-can-be-files) + +## Exposing ports + +Exposing incoming ports through the host container is [fiddly but doable](https://docs.docker.com/engine/reference/run/#expose-incoming-ports). + +This is done by mapping the container port to the host port (only using localhost interface) using `-p`: + +``` +docker run -p 127.0.0.1:$HOSTPORT:$CONTAINERPORT --name CONTAINER -t someimage +``` + +You can tell Docker that the container listens on the specified network ports at runtime by using [EXPOSE](https://docs.docker.com/engine/reference/builder/#expose): + +``` +EXPOSE <CONTAINERPORT> +``` + +Note that EXPOSE does not expose the port itself -- only `-p` will do that. To expose the container's port on your localhost's port: + +``` +iptables -t nat -A DOCKER -p tcp --dport <LOCALHOSTPORT> -j DNAT --to-destination <CONTAINERIP>:<PORT> +``` + +If you're running Docker in Virtualbox, you then need to forward the port there as well, using [forwarded_port](https://docs.vagrantup.com/v2/networking/forwarded_ports.html). Define a range of ports in your Vagrantfile like this so you can dynamically map them: + +``` +Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| + ... + + (49000..49900).each do |port| + config.vm.network :forwarded_port, :host => port, :guest => port + end + + ... +end +``` + +If you forget what you mapped the port to on the host container, use `docker port` to show it: + +``` +docker port CONTAINER $CONTAINERPORT +``` + +## Best Practices + +This is where general Docker best practices and war stories go: + +* [The Rabbit Hole of Using Docker in Automated Tests](http://gregoryszorc.com/blog/2014/10/16/the-rabbit-hole-of-using-docker-in-automated-tests/) +* [Bridget Kromhout](https://twitter.com/bridgetkromhout) has a useful blog post on [running Docker in production](http://sysadvent.blogspot.co.uk/2014/12/day-1-docker-in-production-reality-not.html) at Dramafever. +* There's also a best practices [blog post](http://developers.lyst.com/devops/2014/12/08/docker/) from Lyst. +* [Building a Development Environment With Docker](https://tersesystems.com/2013/11/20/building-a-development-environment-with-docker/) +* [Discourse in a Docker Container](https://samsaffron.com/archive/2013/11/07/discourse-in-a-docker-container) + +## Security + +This is where security tips about Docker go. The Docker [security](https://docs.docker.com/engine/security/security/) page goes into more detail. + +First things first: Docker runs as root. If you are in the `docker` group, you effectively [have root access](http://reventlov.com/advisories/using-the-docker-command-to-root-the-host). If you expose the docker unix socket to a container, you are giving the container [root access to the host](https://www.lvh.io/posts/dont-expose-the-docker-socket-not-even-to-a-container.html). + +Docker should not be your only defense. You should secure and harden it. + +For an understanding of what containers leave exposed, you should read [Understanding and Hardening Linux Containers](https://www.nccgroup.trust/globalassets/our-research/us/whitepapers/2016/april/ncc_group_understanding_hardening_linux_containers-1-1.pdf) by [Aaron Grattafiori](https://twitter.com/dyn___). This is a complete and comprehensive guide to the issues involved with containers, with a plethora of links and footnotes leading on to yet more useful content. The security tips following are useful if you've already hardened containers in the past, but are not a substitute for understanding. + +### Security Tips + +For greatest security, you want to run Docker inside a virtual machine. This is straight from the Docker Security Team Lead -- [slides](http://www.slideshare.net/jpetazzo/linux-containers-lxc-docker-and-security) / [notes](http://www.projectatomic.io/blog/2014/08/is-it-safe-a-look-at-docker-and-security-from-linuxcon/). Then, run with AppArmor / seccomp / SELinux / grsec etc to [limit the container permissions](http://linux-audit.com/docker-security-best-practices-for-your-vessel-and-containers/). See the [Docker 1.10 security features](https://blog.docker.com/2016/02/docker-engine-1-10-security/) for more details. + +Docker image ids are [sensitive information](https://medium.com/@quayio/your-docker-image-ids-are-secrets-and-its-time-you-treated-them-that-way-f55e9f14c1a4) and should not be exposed to the outside world. Treat them like passwords. + +See the [Docker Security Cheat Sheet](https://github.com/konstruktoid/Docker/blob/master/Security/CheatSheet.adoc) by [Thomas Sjögren](https://github.com/konstruktoid): some good stuff about container hardening in there. + +Check out the [docker bench security script](https://github.com/docker/docker-bench-security), download the [white papers](https://blog.docker.com/2015/05/understanding-docker-security-and-best-practices/) and subscribe to the [mailing lists](https://www.docker.com/docker-security) (unfortunately Docker does not have a unique mailing list, only dev / user). + +You should start off by using a kernel with unstable patches for grsecurity / pax compiled in, such as [Alpine Linux](https://en.wikipedia.org/wiki/Alpine_Linux). If you are using grsecurity in production, you should spring for [commercial support](https://grsecurity.net/business_support.php) for the [stable patches](https://grsecurity.net/announce.php), same as you would do for RedHat. It's $200 a month, which is nothing to your devops budget. + +Since docker 1.11 you can easily limit the number of active processes running inside a container to prevent fork bombs. This requires a linux kernel >= 4.3 with CGROUP_PIDS=y to be in the kernel configuration. + +``` +docker run --pids-limit=64 +``` + +Also available since docker 1.11 is the ability to prevent processes from gaining new privileges. This feature have been in the linux kernel since version 3.5. You can read more about it in [this](http://www.projectatomic.io/blog/2016/03/no-new-privs-docker/) blog post. + +``` +docker run --security-opt=no-new-privileges +``` + +From the [Docker Security Cheat Sheet](http://container-solutions.com/content/uploads/2015/06/15.06.15_DockerCheatSheet_A2.pdf) (it's in PDF which makes it hard to use, so copying below) by [Container Solutions](http://container-solutions.com/is-docker-safe-for-production/): + +Turn off interprocess communication with: + +``` +docker -d --icc=false --iptables +``` + +Set the container to be read-only: + +``` +docker run --read-only +``` + +Verify images with a hashsum: + +``` +docker pull debian@sha256:a25306f3850e1bd44541976aa7b5fd0a29be +``` + +Set volumes to be read only: + +``` +docker run -v $(pwd)/secrets:/secrets:ro debian +``` + +Define and run a user in your Dockerfile so you don't run as root inside the container: + +``` +RUN groupadd -r user && useradd -r -g user user +USER user +``` + +### User Namespaces + +There's also work on [user namespaces](https://s3hh.wordpress.com/2013/07/19/creating-and-using-containers-without-privilege/) -- it is in 1.10 but is not enabled by default. + +To enable user namespaces ("remap the userns") in Ubuntu 15.10, [follow the blog example](https://raesene.github.io/blog/2016/02/04/Docker-User-Namespaces/). + +### Security Videos + +* [Using Docker Safely](https://youtu.be/04LOuMgNj9U) +* [Securing your applications using Docker](https://youtu.be/KmxOXmPhZbk) +* [Container security: Do containers actually contain?](https://youtu.be/a9lE9Urr6AQ) +* [Linux Containers: Future or Fantasy?](https://www.youtube.com/watch?v=iN6QbszB1R8) + +### Security Roadmap + +The Docker roadmap talks about [seccomp support](https://github.com/docker/docker/blob/master/ROADMAP.md#11-security). +There is an AppArmor policy generator called [bane](https://github.com/jfrazelle/bane), and they're working on [security profiles](https://github.com/docker/docker/issues/17142). + +## Tips + +Sources: + +* [15 Docker Tips in 5 minutes](http://sssslide.com/speakerdeck.com/bmorearty/15-docker-tips-in-5-minutes) +* [CodeFresh Everyday Hacks Docker](https://codefresh.io/blog/everyday-hacks-docker/) + +### Prune + +The new [Data Management Commands](https://github.com/docker/docker/pull/26108) have landed as of Docker 1.13: + +* `docker system prune` +* `docker volume prune` +* `docker network prune` +* `docker container prune` +* `docker image prune` + +### df + +`docker system df` presents a summary of the space currently used by different docker objects. + +### Heredoc Docker Container + +``` +docker build -t htop - << EOF +FROM alpine +RUN apk --no-cache add htop +EOF +``` + +### Last Ids + +``` +alias dl='docker ps -l -q' +docker run ubuntu echo hello world +docker commit $(dl) helloworld +``` + +### Commit with command (needs Dockerfile) + +``` +docker commit -run='{"Cmd":["postgres", "-too -many -opts"]}' $(dl) postgres +``` + +### Get IP address + +``` +docker inspect $(dl) | grep -wm1 IPAddress | cut -d '"' -f 4 +``` + +or install [jq](https://stedolan.github.io/jq/): + +``` +docker inspect $(dl) | jq -r '.[0].NetworkSettings.IPAddress' +``` + +or using a [go template](https://docs.docker.com/engine/reference/commandline/inspect): + +``` +docker inspect -f '{{ .NetworkSettings.IPAddress }}' <container_name> +``` + +or when building an image from Dockerfile, when you want to pass in a build argument: + +``` +DOCKER_HOST_IP=`ifconfig | grep -E "([0-9]{1,3}\.){3}[0-9]{1,3}" | grep -v 127.0.0.1 | awk '{ print $2 }' | cut -f2 -d: | head -n1` +echo DOCKER_HOST_IP = $DOCKER_HOST_IP +docker build \ + --build-arg ARTIFACTORY_ADDRESS=$DOCKER_HOST_IP + -t sometag \ + some-directory/ + ``` + +### Get port mapping + +``` +docker inspect -f '{{range $p, $conf := .NetworkSettings.Ports}} {{$p}} -> {{(index $conf 0).HostPort}} {{end}}' <containername> +``` + +### Find containers by regular expression + +``` +for i in $(docker ps -a | grep "REGEXP_PATTERN" | cut -f1 -d" "); do echo $i; done +``` + +### Get Environment Settings + +``` +docker run --rm ubuntu env +``` + +### Kill running containers + +``` +docker kill $(docker ps -q) +``` + +### Delete all containers (force!! running or stopped containers) + +``` +docker rm -f $(docker ps -qa) +``` + +### Delete old containers + +``` +docker ps -a | grep 'weeks ago' | awk '{print $1}' | xargs docker rm +``` + +### Delete stopped containers + +``` +docker rm -v $(docker ps -a -q -f status=exited) +``` + +### Delete containers after stopping + +``` +docker stop $(docker ps -aq) && docker rm -v $(docker ps -aq) +``` + +### Delete dangling images + +``` +docker rmi $(docker images -q -f dangling=true) +``` + +### Delete all images + +``` +docker rmi $(docker images -q) +``` + +### Delete dangling volumes + +As of Docker 1.9: + +``` +docker volume rm $(docker volume ls -q -f dangling=true) +``` + +In 1.9.0, the filter `dangling=false` does _not_ work - it is ignored and will list all volumes. + +### Show image dependencies + +``` +docker images -viz | dot -Tpng -o docker.png +``` + +### Slimming down Docker containers + +- Cleaning APT in a RUN layer + +This should be done in the same layer as other apt commands. +Otherwise, the previous layers still persist the original information and your images will still be fat. + +``` +RUN {apt commands} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +``` + +- Flatten an image +``` +ID=$(docker run -d image-name /bin/bash) +docker export $ID | docker import – flat-image-name +``` + +- For backup +``` +ID=$(docker run -d image-name /bin/bash) +(docker export $ID | gzip -c > image.tgz) +gzip -dc image.tgz | docker import - flat-image-name +``` + +### Monitor system resource utilization for running containers + +To check the CPU, memory, and network I/O usage of a single container, you can use: + +``` +docker stats <container> +``` + +For all containers listed by id: + +``` +docker stats $(docker ps -q) +``` + +For all containers listed by name: + +``` +docker stats $(docker ps --format '{{.Names}}') +``` + +For all containers listed by image: + +``` +docker ps -a -f ancestor=ubuntu +``` + +Remove all untagged images +``` +docker rmi $(docker images | grep “^” | awk '{split($0,a," "); print a[3]}') +``` + +Remove container by a regular expression +``` +docker ps -a | grep wildfly | awk '{print $1}' | xargs docker rm -f +``` +Remove all exited containers +``` +docker rm -f $(docker ps -a | grep Exit | awk '{ print $1 }') +``` + +### Volumes can be files + +Be aware that you can mount files as volumes. For example you can inject a configuration file like this: + +``` bash +# copy file from container +docker run --rm httpd cat /usr/local/apache2/conf/httpd.conf > httpd.conf + +# edit file +vim httpd.conf + +# start container with modified configuration +docker run --rm -ti -v "$PWD/httpd.conf:/usr/local/apache2/conf/httpd.conf:ro" -p "80:80" httpd diff --git a/notes/cheat-sheets/tmux-cheat-sheet.md b/notes/cheat-sheets/tmux-cheat-sheet.md new file mode 100644 index 00000000..6738f584 --- /dev/null +++ b/notes/cheat-sheets/tmux-cheat-sheet.md @@ -0,0 +1,188 @@ +# tmux shortcuts & cheatsheet + +source: <https://gist.githubusercontent.com/MohamedAlaa/2961058/raw/ddf157a0d7b1674a2190a80e126f2e6aec54f369/tmux-cheatsheet.markdown> + +start new: + + tmux + +start new with session name: + + tmux new -s myname + +attach: + + tmux a # (or at, or attach) + +attach to named: + + tmux a -t myname + +list sessions: + + tmux ls + +<a name="killSessions"></a>kill session: + + tmux kill-session -t myname + +<a name="killAllSessions"></a>Kill all the tmux sessions: + + tmux ls | grep : | cut -d. -f1 | awk '{print substr($1, 0, length($1)-1)}' | xargs kill + +In tmux, hit the prefix `ctrl+b` (my modified prefix is ctrl+a) and then: + +## Sessions + + :new<CR> new session + s list sessions + $ name session + +## <a name="WindowsTabs"></a>Windows (tabs) + + c create window + w list windows + n next window + p previous window + f find window + , name window + & kill window + +## <a name="PanesSplits"></a>Panes (splits) + + % vertical split + " horizontal split + + o swap panes + q show pane numbers + x kill pane + + break pane into window (e.g. to select text by mouse to copy) + - restore pane from window + ⍽ space - toggle between layouts + <prefix> q (Show pane numbers, when the numbers show up type the key to goto that pane) + <prefix> { (Move the current pane left) + <prefix> } (Move the current pane right) + <prefix> z toggle pane zoom + +## <a name="syncPanes"></a>Sync Panes + +You can do this by switching to the appropriate window, typing your Tmux prefix (commonly Ctrl-B or Ctrl-A) and then a colon to bring up a Tmux command line, and typing: + +``` +:setw synchronize-panes +``` + +You can optionally add on or off to specify which state you want; otherwise the option is simply toggled. This option is specific to one window, so it won’t change the way your other sessions or windows operate. When you’re done, toggle it off again by repeating the command. [tip source](http://blog.sanctum.geek.nz/sync-tmux-panes/) + + +## Resizing Panes + +You can also resize panes if you don’t like the layout defaults. I personally rarely need to do this, though it’s handy to know how. Here is the basic syntax to resize panes: + + PREFIX : resize-pane -D (Resizes the current pane down) + PREFIX : resize-pane -U (Resizes the current pane upward) + PREFIX : resize-pane -L (Resizes the current pane left) + PREFIX : resize-pane -R (Resizes the current pane right) + PREFIX : resize-pane -D 20 (Resizes the current pane down by 20 cells) + PREFIX : resize-pane -U 20 (Resizes the current pane upward by 20 cells) + PREFIX : resize-pane -L 20 (Resizes the current pane left by 20 cells) + PREFIX : resize-pane -R 20 (Resizes the current pane right by 20 cells) + PREFIX : resize-pane -t 2 20 (Resizes the pane with the id of 2 down by 20 cells) + PREFIX : resize-pane -t -L 20 (Resizes the pane with the id of 2 left by 20 cells) + + +## Copy mode: + +Pressing PREFIX [ places us in Copy mode. We can then use our movement keys to move our cursor around the screen. By default, the arrow keys work. we set our configuration file to use Vim keys for moving between windows and resizing panes so we wouldn’t have to take our hands off the home row. tmux has a vi mode for working with the buffer as well. To enable it, add this line to .tmux.conf: + + setw -g mode-keys vi + +With this option set, we can use h, j, k, and l to move around our buffer. + +To get out of Copy mode, we just press the ENTER key. Moving around one character at a time isn’t very efficient. Since we enabled vi mode, we can also use some other visible shortcuts to move around the buffer. + +For example, we can use "w" to jump to the next word and "b" to jump back one word. And we can use "f", followed by any character, to jump to that character on the same line, and "F" to jump backwards on the line. + + Function vi emacs + Back to indentation ^ M-m + Clear selection Escape C-g + Copy selection Enter M-w + Cursor down j Down + Cursor left h Left + Cursor right l Right + Cursor to bottom line L + Cursor to middle line M M-r + Cursor to top line H M-R + Cursor up k Up + Delete entire line d C-u + Delete to end of line D C-k + End of line $ C-e + Goto line : g + Half page down C-d M-Down + Half page up C-u M-Up + Next page C-f Page down + Next word w M-f + Paste buffer p C-y + Previous page C-b Page up + Previous word b M-b + Quit mode q Escape + Scroll down C-Down or J C-Down + Scroll up C-Up or K C-Up + Search again n n + Search backward ? C-r + Search forward / C-s + Start of line 0 C-a + Start selection Space C-Space + Transpose chars C-t + +## Misc + + d detach + t big clock + ? list shortcuts + : prompt + +## Configurations Options: + + # Mouse support - set to on if you want to use the mouse + * setw -g mode-mouse off + * set -g mouse-select-pane off + * set -g mouse-resize-pane off + * set -g mouse-select-window off + + # Set the default terminal mode to 256color mode + set -g default-terminal "screen-256color" + + # enable activity alerts + setw -g monitor-activity on + set -g visual-activity on + + # Center the window list + set -g status-justify centre + + # Maximize and restore a pane + unbind Up bind Up new-window -d -n tmp \; swap-pane -s tmp.1 \; select-window -t tmp + unbind Down + bind Down last-window \; swap-pane -s tmp.1 \; kill-window -t tmp + +## Resources: + +* [tmux: Productive Mouse-Free Development](http://pragprog.com/book/bhtmux/tmux) +* [How to reorder windows](http://superuser.com/questions/343572/tmux-how-do-i-reorder-my-windows) + +## Notes: + +* + +## Changelog: + +* 1411143833002 - Added [toggle zoom](#PanesSplits) under Panes (splits) section. +* 1411143833002 - [Added Sync Panes](#syncPanes) +* 1414276652677 - [Added Kill all tmux sessions ](#killAllSessions) +* 1438585211173 - [corrected create and add next and previus thanks to @justinjhendrick](#WindowsTabs) + +## Request an Update: + +We Noticed that our Cheatsheet is growing and people are coloberating to add new tips and tricks, so please tweet to me what would you like to add and let's make it better! + +* Twitter: [@MohammedAlaa](http://twitter.com/MohammedAlaa) diff --git a/notes/frameworks/darknet.md b/notes/frameworks/darknet.md new file mode 100644 index 00000000..c2d73294 --- /dev/null +++ b/notes/frameworks/darknet.md @@ -0,0 +1,13 @@ +# Darknet + +Installation + +- `git clone https://github.com/AlexeyAB/darknet darknet_ab` +- if `make` errors + - `export LD_LIBRARY_PATH=/usr/local/cuda-9.0/lib64:$LD_LIBRARY_PATH` + - `export PATH=/usr/local/cuda-9.0/bin:$PATH` + - `make clean && make` + +# YOLO Python GPU interface + +- `git clone https://github.com/madhawav/YOLO3-4-Py` and follow setup for GPU
\ No newline at end of file diff --git a/notes/frameworks/docker.md b/notes/frameworks/docker.md new file mode 100644 index 00000000..af703c0e --- /dev/null +++ b/notes/frameworks/docker.md @@ -0,0 +1,28 @@ +# Docker + +#### Useful Docker commands + +- list names of running containers: `docker ps` +- log int to docker: `docker exec -ti -u root container_name bash` +- reload daemon: `sudo systemctl daemon-reload` +- restart docker `sudo systemctl restart docker` + +#### Change Image Storage Location + +- Using many docker images can use several hundred GBs of store. It's often useful to move this off your statup disk +- Edit `sudo nano /etc/docker/daemon.json` +- Add +``` +{ + "data-root": "/path/to/new/docker" +} +``` +- stop docker `sudo systemctl stop docker` +- check docker has stopped `ps aux | grep -i docker | grep -v grep` +- copy data to new location `sudo rsync -axPS /var/lib/docker/ /path/to/new/docker` +- `sudo rsync -axPS /var/lib/docker/ /media/ubuntu/disk_name/data_store/docker_images` to copy to your new disk + + +#### Permissions + +- permissions still not solved, but here are useful tips: <http://www.carlboettiger.info/2014/10/21/docker-and-user-permissions-crazyness.html>
\ No newline at end of file diff --git a/notes/frameworks/nvidia.md b/notes/frameworks/nvidia.md new file mode 100644 index 00000000..b54aed8a --- /dev/null +++ b/notes/frameworks/nvidia.md @@ -0,0 +1,22 @@ +# NVIDIA + +During installations may need to udpate your paths when using multiple versions of CUDA + +``` +export CUDA_HOME=/usr/local/cuda +export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH +export PATH=$CUDA_HOME/bin:$PATH +``` + +### Install CUDA versions + +Example installing older CUDA 9.0: + +- go to <https://developer.nvidia.com/cuda-90-download-archive> +- download .deb for your Ubuntu version +- `sudo dpkg -i cuda-repo-ubuntu1604_9.0.176-1_amd64.deb` +- `sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub` +- `sudo apt-get update` +- `sudo apt-get install cuda-9-0` +- `sudo rm /usr/local/cuda` to remove previous symlink +- `sudo ln -s /usr/local/cuda-9.0 /usr/local/cuda`
\ No newline at end of file diff --git a/notes/frameworks/progress_gan.md b/notes/frameworks/progress_gan.md new file mode 100644 index 00000000..580052a3 --- /dev/null +++ b/notes/frameworks/progress_gan.md @@ -0,0 +1,15 @@ +# Progressive GAN + +Progressive Growing of GANs for Improved Quality, Stability, and Variation<br><i>– Official TensorFlow implementation of the ICLR 2018 paper</i> + +### Start new training + +- edit `config.py` dataset path +- `desc += '-my_name'; dataset = EasyDict(tfrecord_dir='tf_dir_name'); train.mirror_augment = True;` +`python train.py` +- + +### Resume Training + +- `desc += '-my_name'; dataset = EasyDict(tfrecord_dir='512x512'); train.mirror_augment = True; train.resume_kimg = 5587.8; train.resume_run_id = 11;` +- to get `kimg`, `cat 011_yourproject/log.txt` and find latest kimg number diff --git a/notes/utils/image_utils.md b/notes/utils/image_utils.md new file mode 100644 index 00000000..25dc200e --- /dev/null +++ b/notes/utils/image_utils.md @@ -0,0 +1,302 @@ +Collection of utility scripts for video, image, and network + +------------------------------- + +# Video Utilities + +## FFMPEG + +resize video + +`ffmpeg -i input.mov -filter:v scale=720:-1 -c:a copy out.mov` + +resize to still frames + +`ffmpeg -i input.mov -s 320x180 -qscale:v 2 to/frame_%07d.jpg` + +export frames every n seconds + +`ffmpeg -i input.flv -vf fps=1 out%d.png` + +export frames every x/n seconds + +`ffmpeg -i myvideo.avi -vf fps=1/60 img%03d.jpg` + +shorten video + +`ffmpeg -ss [start] -i in.mp4 -t [duration] -c copy out.mp4` + +shorten video example: 5 second clip starting at 5 seconds + +`ffmpeg -ss 00:00:05 -i file.mp4 -t 00:00:05 -c copy file_5s.mp4` + +render video from still frames: +`ffmpeg -r 1/5 -i frame_%03d.png -c:v libx264 -vf fps=25 -pix_fmt yuv420p out.mp4` + +using glob: +`ffmpeg -framerate 1 -pattern_type glob -i '*.png' -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4` + +or +`ffmpeg -i frame_%03d.png -c:v ffv1 -qscale:v 0 test.avi` + +concatenate videos +`ffmpeg -f concat -i inputs.txt -vcodec copy -acodec copy Mux1.mp4` + +------------------------------- + +# Image Utilities + +## Cropping, resizing, convert format + +Convert images to maximum width and height using imagemagick + +`mogrify in.jpg -resize 800x600` + +Resize and crop center + +`convert original.jpg -resize 200x200^ -gravity Center -crop 200x200+0+0 +repage cropped.png` + +Crop image using imagemagick with W x H x X x Y + +`mogrify -crop 640x480+50+100 foo.png` + +Convert all files with matching filename to .jpg + +- `find . -name "*.gif" -print0 | xargs -0 mogrify -format jpg` + +Resize all files with matching name to new size + +- `find . -name "*.jpg" -print0 | xargs -0 mogrify -resize 720` + +Optimize all images in subdirectories for web / s3: + +- `find . -name "*.jpg" -print0 | xargs -0 mogrify -sampling-factor 4:2:0 -strip -quality 85 -interlace JPEG -colorspace RGB` + +Verify image is not corrupt + +- `identify "./myfolder/*" >log.txt 2>&1` + +Resize all images in subdirectories such that smallest dimension is 256 + +- `find . -name "*.JPEG" | xargs -I {} convert {} -resize "256^>" {}` + +Crop animated GIF + +- `convert input.gif -coalesce -repage 0x0 -crop WxH+X+Y +repage output.gif` + + +Not sure what this is: + +`-format "%[pixel: u.p{0,0}]" info:` + +Batch folder of images into squares, for pix2pix + +``` +W=64 +H=64 +INDIR=mydir/orig +OUTDIR=mydir/images + +for f in $INDIR/*.jpg; +do + fn=$(basename "$f") + convert $f -resize $Wx$H^ -gravity Center -crop $W'x'$H'+0+0' +repage $OUTDIR/$fn +done +``` + +## Animate GIFs + +With ImageMagick + +`convert -delay 100 -loop 0 image*.jpg animation.gif` + +With ImageMagick, higher quality + +`convert -background white -alpha remove -layers OptimizePlus -delay 1 *.png -loop 0 anim.gif` + +With [Gifsicle](https://www.lcdf.org/gifsicle/) + +`gifsicle --delay=10 --loop *.gif > anim.gif` + + +------------------------------- + + +# File I/O Utilities + +## Cleaning Directories + +Remove all of a specific file type + +`find . -name "*.gif" -print0 | xargs -0 rm` + +Try removing invalid images + +`find . -name \*.png -exec identify -ping {} \; -or -exec echo {} \;` +`find . -name \*.png -exec identify -ping {} \; -or -exec rm -f {} \;` + +Rename directory of files incrementally + +`j=1;for i in *.png; do mv "$i" ILZMi_MGTDg_q2_"$j".png; let j=j+1;done` + +Count all files in subdirectories + +`find . -type f | wc -l` + +Remove all files from input file + +`xargs rm < 1.txt` + +Count all files in subdirectories + +`find . -maxdepth 1 -mindepth 1 -type d -exec sh -c 'echo "{} : $(find "{}" -type f | wc -l)" file\(s\)' \;` + +Filter images with white pixels in upper corner + +``` +for f in *;do convert $f -format "%[pixel: u.p{0,0}]" info:|xargs echo $f |grep white|cut -d ' ' -f 1|xargs rm -f;done +``` + +Copy all files in subdirectoreis + +`find . -name \*.jpg -exec cp {} ../test \;` + +Clean directories after google images python scrape +``` +find -name "*thumb-90x90.jpg" | xargs rm -f +find . -name "*.jpg" | grep -v 'clean' | xargs rm -f +find . -name "*.jpeg" | grep -v 'clean' | xargs rm -f +``` + +Center crop square images for training + +``` +convert -define jpeg:size=200x200 original.jpeg -thumbnail 100x100^ -gravity center -extent 100x100 thumbnail.jpeg +``` + +Resize all directories to square crops for Caffe +``` +find . -name "*.jpg" -print0 | xargs -0 mogrify -resize 256x256^ -gravity Center -crop 256x256+0+0 +repage + +``` + + + +Replace spaces in filenames + +`find . -depth -name "* *" -execdir rename 's/ /_/g' "{}" \;` + +Rename all files in lowercase + +`find . -depth -exec rename 's/(.*)\/([^\/]*)/$1\/\L$2/' {} \;` + +**Delete images less than a specified size. Run without `--delete` to verify list** + +`missing cmd here?` + +------------------------------------------- + +## Example Script for Cleaning ImageNet Downloads + +``` +for f in *;do mogrify -resize 640x640! $f;done +for f in *;do mogrify -crop 500x500+70+70 $f;done +# filter out images with solid color backgrounds + +for f in *;do convert $f -format "%[pixel: u.p{0,0}]" info:|xargs echo $f |grep -E 'white|black|red|blue|green|yellow|purple|none|transparent|WhiteSmoke|gray99|gray98' | cut -d ' ' -f 1|xargs rm -f;done +# +for f in *;do convert $f -format "%[pixel: u.p{499,499}]" info:|xargs echo $f |grep -E 'white|black|red|blue|green|yellow|purple|none|transparent|WhiteSmoke|gray99|gray98' | cut -d ' ' -f 1|xargs rm -f;done +# +for f in *;do convert $f -format "%[pixel: u.p{0,499}]" info:|xargs echo $f |grep -E 'white|black|red|blue|green|yellow|purple|none|transparent|WhiteSmoke|gray99|gray98' | cut -d ' ' -f 1|xargs rm -f;done +# +for f in *;do convert $f -format "%[pixel: u.p{499,0}]" info:|xargs echo $f |grep -E 'white|black|red|blue|green|yellow|purple|none|transparent|WhiteSmoke|gray99|gray98' | cut -d ' ' -f 1|xargs rm -f;done +# +# Remove files that are b&w +for f in *;do convert $f -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info: | xargs echo $f | grep -v "0.[0-9]*$" | cut -d ' ' -f 1 | xargs rm -f;done +# Remove files that are close to b&w +for f in *;do convert $f -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info: | xargs echo $f | grep "0.0[0-9]*$" | cut -d ' ' -f 1 | xargs rm -f;done +# +# mk bkup dir at full size (512) +for f in *;do mogrify -resize 256x256! -median 5x5 -quality 100 $f;done +for f in *;do mogrify -median 4x4 -quality 100 $f;done +for f in *;do mogrify -blur 0x6 -quality 100 $f;done +# create AB train,val,test splits +# process A with blur Filter +cd +``` + + +--------------------------------------- + +# Network Utilities + +## Setup Vhosts + +``` +sudo mkdir -p /var/www/api.dulldream.xyz/public_html +sudo chown -R $USER:$USER /var/www/api.dulldream.xyz/public_html +sudo cp /etc/apache2/sites-available/api.dulldream.conf /etc/apache2/sites-available/api.dulldream.xyz.conf +sudo nano /etc/apache2/sites-available/dulldream.xyz.conf +``` + +``` +<VirtualHost *:80> + ServerAdmin admin@example.com + ServerName imagenet.com + ServerAlias www.imagenet.com + DocumentRoot /var/www/imagenet.com/public_html + ErrorLog ${APACHE_LOG_DIR}/error.log + CustomLog ${APACHE_LOG_DIR}/access.log combined +</VirtualHost> +``` +``` +sudo a2ensite imagenet.xyz.conf +``` + +## Local Server + +PHP + +`cd ~/public_html; php -S localhost:8000` + +Python + +`python -m SimpleHTTPServer 8000` + + +Attempt to use modrewrite with built in server +``` + if (file_exists(__DIR__ . '/' . $_SERVER['REQUEST_URI'])) { + return false; // serve the requested resource as-is. + } else { + include_once 'index.php'; + } +``` + + +# System Utilities + +configure local timezone + +``` +export LC_ALL="en_US.UTF-8" +export LC_CTYPE="en_US.UTF-8" +sudo dpkg-reconfigure locales +``` + + +------------------------------------------- + +## Image Annotation + +*BBTag: image annotation tool* + +This launches the BBTag annotation app + +`/root/BBTag/BBTag.sh` + +Process folders labeled with BBTag + +This will convert directory of images into cropped/resized images for Caffe training + +`Python/convert_bb_tag_xml.py -x input.xml -s source/images -o output/images`
\ No newline at end of file |
