new nbs

author: adamhrv <adam@ahprojects.com> 2018-12-13 14:33:05 +0100
committer: adamhrv <adam@ahprojects.com> 2018-12-13 14:33:05 +0100
commit: 49a49bebe3f972e93add837180f5672a4ae62ce0 (patch)
tree: 03175a840591518998e4e8ecd92d64e599ef4eb0 /megapixels/notebooks/datasets/test_pd_funcs.ipynb
parent: 7891e9d0dc9adcb68749f0e8049c0c8901b4f288 (diff)
1 files changed, 582 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/test_pd_funcs.ipynb b/megapixels/notebooks/datasets/test_pd_funcs.ipynb
new file mode 100644
index 00000000..62c2b0db
--- /dev/null
+++ b/megapixels/notebooks/datasets/test_pd_funcs.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Append UUID to SHA256 CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "from os.path import join\n",
+    "from glob import glob\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('/work/megapixels_dev/megapixels/')\n",
+    "from app.utils import file_utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_STORE = '/data_store_nas/'\n",
+    "dir_dataset = join(DATA_STORE, 'datasets/people/lfw/')\n",
+    "dir_metadata = join(dir_dataset, 'metadata')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fp_files = join(dir_metadata, 'image_files.csv')\n",
+    "fp_rois = join(dir_metadata, 'image_rois.csv')\n",
+    "fp_identities = join(dir_metadata, 'identities.csv')\n",
+    "fp_pose = join(dir_metadata, 'image_pose.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_rois = pd.read_csv(fp_rois).set_index('index')\n",
+    "df_files = pd.read_csv(fp_files).set_index('index')\n",
+    "df_identities = pd.read_csv(fp_identities).set_index('index')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>h</th>\n",
+       "      <th>image_height</th>\n",
+       "      <th>image_index</th>\n",
+       "      <th>image_width</th>\n",
+       "      <th>w</th>\n",
+       "      <th>x</th>\n",
+       "      <th>y</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>index</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.330000</td>\n",
+       "      <td>250</td>\n",
+       "      <td>0</td>\n",
+       "      <td>250</td>\n",
+       "      <td>0.330000</td>\n",
+       "      <td>0.336667</td>\n",
+       "      <td>0.350000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.393333</td>\n",
+       "      <td>250</td>\n",
+       "      <td>1</td>\n",
+       "      <td>250</td>\n",
+       "      <td>0.393333</td>\n",
+       "      <td>0.286667</td>\n",
+       "      <td>0.313333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              h  image_height  image_index  image_width         w         x  \\\n",
+       "index                                                                         \n",
+       "0      0.330000           250            0          250  0.330000  0.336667   \n",
+       "1      0.393333           250            1          250  0.393333  0.286667   \n",
+       "\n",
+       "              y  \n",
+       "index            \n",
+       "0      0.350000  \n",
+       "1      0.313333  "
+      ]
+     },
+     "execution_count": 130,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_rois.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ext</th>\n",
+       "      <th>fn</th>\n",
+       "      <th>subdir</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>index</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>jpg</td>\n",
+       "      <td>AJ_Cook_0001</td>\n",
+       "      <td>AJ_Cook</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>jpg</td>\n",
+       "      <td>AJ_Lamas_0001</td>\n",
+       "      <td>AJ_Lamas</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       ext             fn    subdir\n",
+       "index                              \n",
+       "0      jpg   AJ_Cook_0001   AJ_Cook\n",
+       "1      jpg  AJ_Lamas_0001  AJ_Lamas"
+      ]
+     },
+     "execution_count": 131,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_files.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>description</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>images</th>\n",
+       "      <th>image_index</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>index</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A. J. Cook</td>\n",
+       "      <td>Canadian actress</td>\n",
+       "      <td>f</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AJ Lamas</td>\n",
+       "      <td>American actor</td>\n",
+       "      <td>m</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             name       description gender  images  image_index\n",
+       "index                                                          \n",
+       "0      A. J. Cook  Canadian actress      f       1            0\n",
+       "1        AJ Lamas    American actor      m       1            1"
+      ]
+     },
+     "execution_count": 132,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_identities.head(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5749"
+      ]
+     },
+     "execution_count": 133,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df_identities)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "13233"
+      ]
+     },
+     "execution_count": 134,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(df_files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "indices_included = list(df_rois['image_index'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "indices_all = list(range(13233))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{6072, 1556, 11207}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(set(indices_all) - set(indices_included))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tqdm import tqdm_notebook as tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_roi_groups = df_rois.groupby('image_index')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 161,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "df3b603ad0464cb6983862fbf6716d02",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=13230), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AJ_Cook_0001\n",
+      "0.33\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):\n",
+    "  image_index = df_roi_group.image_index.values[0]\n",
+    "  pds_file = df_files.iloc[image_index]\n",
+    "  print(pds_file.fn)\n",
+    "  w = df_roi_group.w.values[0]\n",
+    "  print(w)\n",
+    "  break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from app.models.bbox import BBox"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import operator\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bboxes = []\n",
+    "dim = (250, 250)\n",
+    "bboxes.append(BBox.from_xywh_dim(10, 10, 40, 40, dim))\n",
+    "bboxes.append(BBox.from_xywh_dim(10, 10, 20, 20, dim))\n",
+    "bboxes.append(BBox.from_xywh_dim(10, 10, 200, 200, dim))\n",
+    "bboxes.append(BBox.from_xywh_dim(10, 10, 30, 30, dim))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6399999999999999\n",
+      "0.0256\n",
+      "0.0144\n",
+      "0.006399999999999998\n",
+      "\n",
+      "0.6399999999999999\n",
+      "0.0256\n",
+      "0.0144\n",
+      "0.006399999999999998\n",
+      "max area is: 0.6399999999999999\n"
+     ]
+    }
+   ],
+   "source": [
+    "for b in bboxes:\n",
+    "  print(b.area)\n",
+    "bboxes.sort(key = operator.attrgetter('area'), reverse=True)[0]\n",
+    "print('')\n",
+    "for b in bboxes:\n",
+    "  print(b.area)\n",
+    "\n",
+    "print('max area is: {}'.format(bboxes[0].area))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bboxes.sort(key = operator.attrgetter('area'), reverse=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[<app.models.bbox.BBox at 0x7ff360047978>,\n",
+       " <app.models.bbox.BBox at 0x7ff3600479b0>,\n",
+       " <app.models.bbox.BBox at 0x7ff3600479e8>,\n",
+       " <app.models.bbox.BBox at 0x7ff360047a20>]"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bboxes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:megapixels]",
+   "language": "python",
+   "name": "conda-env-megapixels-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
author	adamhrv <adam@ahprojects.com>	2018-12-13 14:33:05 +0100
committer	adamhrv <adam@ahprojects.com>	2018-12-13 14:33:05 +0100
commit	49a49bebe3f972e93add837180f5672a4ae62ce0 (patch)
tree	03175a840591518998e4e8ecd92d64e599ef4eb0 /megapixels/notebooks/datasets/test_pd_funcs.ipynb
parent	7891e9d0dc9adcb68749f0e8049c0c8901b4f288 (diff)