{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Append UUID to SHA256 CSV" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "from os.path import join\n", "from glob import glob\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.append('/work/megapixels_dev/megapixels/')\n", "from app.utils import file_utils" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DATA_STORE = '/data_store_nas/'\n", "dir_dataset = join(DATA_STORE, 'datasets/people/lfw/')\n", "dir_metadata = join(dir_dataset, 'metadata')" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "fp_files = join(dir_metadata, 'image_files.csv')\n", "fp_rois = join(dir_metadata, 'image_rois.csv')\n", "fp_identities = join(dir_metadata, 'identities.csv')\n", "fp_pose = join(dir_metadata, 'image_pose.csv')" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [], "source": [ "df_rois = pd.read_csv(fp_rois).set_index('index')\n", "df_files = pd.read_csv(fp_files).set_index('index')\n", "df_identities = pd.read_csv(fp_identities).set_index('index')" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
himage_heightimage_indeximage_widthwxy
index
00.33000025002500.3300000.3366670.350000
10.39333325012500.3933330.2866670.313333
\n", "
" ], "text/plain": [ " h image_height image_index image_width w x \\\n", "index \n", "0 0.330000 250 0 250 0.330000 0.336667 \n", "1 0.393333 250 1 250 0.393333 0.286667 \n", "\n", " y \n", "index \n", "0 0.350000 \n", "1 0.313333 " ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_rois.head(2)" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
extfnsubdir
index
0jpgAJ_Cook_0001AJ_Cook
1jpgAJ_Lamas_0001AJ_Lamas
\n", "
" ], "text/plain": [ " ext fn subdir\n", "index \n", "0 jpg AJ_Cook_0001 AJ_Cook\n", "1 jpg AJ_Lamas_0001 AJ_Lamas" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_files.head(2)" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptiongenderimagesimage_index
index
0A. J. CookCanadian actressf10
1AJ LamasAmerican actorm11
\n", "
" ], "text/plain": [ " name description gender images image_index\n", "index \n", "0 A. J. Cook Canadian actress f 1 0\n", "1 AJ Lamas American actor m 1 1" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_identities.head(2)" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5749" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_identities)" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "13233" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_files)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [], "source": [ "indices_included = list(df_rois['image_index'])" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "indices_all = list(range(13233))" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{6072, 1556, 11207}\n" ] } ], "source": [ "print(set(indices_all) - set(indices_included))" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "from tqdm import tqdm_notebook as tqdm" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "df_roi_groups = df_rois.groupby('image_index')" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "df3b603ad0464cb6983862fbf6716d02", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=13230), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "AJ_Cook_0001\n", "0.33\n", "\n" ] } ], "source": [ "for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):\n", " image_index = df_roi_group.image_index.values[0]\n", " pds_file = df_files.iloc[image_index]\n", " print(pds_file.fn)\n", " w = df_roi_group.w.values[0]\n", " print(w)\n", " break" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "from app.models.bbox import BBox" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "import operator\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "bboxes = []\n", "dim = (250, 250)\n", "bboxes.append(BBox.from_xywh_dim(10, 10, 40, 40, dim))\n", "bboxes.append(BBox.from_xywh_dim(10, 10, 20, 20, dim))\n", "bboxes.append(BBox.from_xywh_dim(10, 10, 200, 200, dim))\n", "bboxes.append(BBox.from_xywh_dim(10, 10, 30, 30, dim))" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.6399999999999999\n", "0.0256\n", "0.0144\n", "0.006399999999999998\n", "\n", "0.6399999999999999\n", "0.0256\n", "0.0144\n", "0.006399999999999998\n", "max area is: 0.6399999999999999\n" ] } ], "source": [ "for b in bboxes:\n", " print(b.area)\n", "bboxes.sort(key = operator.attrgetter('area'), reverse=True)[0]\n", "print('')\n", "for b in bboxes:\n", " print(b.area)\n", "\n", "print('max area is: {}'.format(bboxes[0].area))" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "bboxes.sort(key = operator.attrgetter('area'), reverse=True)\n" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[,\n", " ,\n", " ,\n", " ]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bboxes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:megapixels]", "language": "python", "name": "conda-env-megapixels-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }