summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/test_pd_funcs.ipynb
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-13 14:33:05 +0100
committeradamhrv <adam@ahprojects.com>2018-12-13 14:33:05 +0100
commit49a49bebe3f972e93add837180f5672a4ae62ce0 (patch)
tree03175a840591518998e4e8ecd92d64e599ef4eb0 /megapixels/notebooks/datasets/test_pd_funcs.ipynb
parent7891e9d0dc9adcb68749f0e8049c0c8901b4f288 (diff)
new nbs
Diffstat (limited to 'megapixels/notebooks/datasets/test_pd_funcs.ipynb')
-rw-r--r--megapixels/notebooks/datasets/test_pd_funcs.ipynb582
1 files changed, 582 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/test_pd_funcs.ipynb b/megapixels/notebooks/datasets/test_pd_funcs.ipynb
new file mode 100644
index 00000000..62c2b0db
--- /dev/null
+++ b/megapixels/notebooks/datasets/test_pd_funcs.ipynb
@@ -0,0 +1,582 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Append UUID to SHA256 CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "from os.path import join\n",
+ "from glob import glob\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels/')\n",
+ "from app.utils import file_utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DATA_STORE = '/data_store_nas/'\n",
+ "dir_dataset = join(DATA_STORE, 'datasets/people/lfw/')\n",
+ "dir_metadata = join(dir_dataset, 'metadata')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_files = join(dir_metadata, 'image_files.csv')\n",
+ "fp_rois = join(dir_metadata, 'image_rois.csv')\n",
+ "fp_identities = join(dir_metadata, 'identities.csv')\n",
+ "fp_pose = join(dir_metadata, 'image_pose.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_rois = pd.read_csv(fp_rois).set_index('index')\n",
+ "df_files = pd.read_csv(fp_files).set_index('index')\n",
+ "df_identities = pd.read_csv(fp_identities).set_index('index')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 130,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>h</th>\n",
+ " <th>image_height</th>\n",
+ " <th>image_index</th>\n",
+ " <th>image_width</th>\n",
+ " <th>w</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0.330000</td>\n",
+ " <td>250</td>\n",
+ " <td>0</td>\n",
+ " <td>250</td>\n",
+ " <td>0.330000</td>\n",
+ " <td>0.336667</td>\n",
+ " <td>0.350000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>0.393333</td>\n",
+ " <td>250</td>\n",
+ " <td>1</td>\n",
+ " <td>250</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>0.286667</td>\n",
+ " <td>0.313333</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " h image_height image_index image_width w x \\\n",
+ "index \n",
+ "0 0.330000 250 0 250 0.330000 0.336667 \n",
+ "1 0.393333 250 1 250 0.393333 0.286667 \n",
+ "\n",
+ " y \n",
+ "index \n",
+ "0 0.350000 \n",
+ "1 0.313333 "
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rois.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>AJ_Cook_0001</td>\n",
+ " <td>AJ_Cook</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>AJ_Lamas_0001</td>\n",
+ " <td>AJ_Lamas</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn subdir\n",
+ "index \n",
+ "0 jpg AJ_Cook_0001 AJ_Cook\n",
+ "1 jpg AJ_Lamas_0001 AJ_Lamas"
+ ]
+ },
+ "execution_count": 131,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_files.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>image_index</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>A. J. Cook</td>\n",
+ " <td>Canadian actress</td>\n",
+ " <td>f</td>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name description gender images image_index\n",
+ "index \n",
+ "0 A. J. Cook Canadian actress f 1 0\n",
+ "1 AJ Lamas American actor m 1 1"
+ ]
+ },
+ "execution_count": 132,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identities.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 133,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5749"
+ ]
+ },
+ "execution_count": 133,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(df_identities)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13233"
+ ]
+ },
+ "execution_count": 134,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(df_files)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "indices_included = list(df_rois['image_index'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "indices_all = list(range(13233))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{6072, 1556, 11207}\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(set(indices_all) - set(indices_included))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from tqdm import tqdm_notebook as tqdm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_roi_groups = df_rois.groupby('image_index')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "df3b603ad0464cb6983862fbf6716d02",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=13230), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AJ_Cook_0001\n",
+ "0.33\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):\n",
+ " image_index = df_roi_group.image_index.values[0]\n",
+ " pds_file = df_files.iloc[image_index]\n",
+ " print(pds_file.fn)\n",
+ " w = df_roi_group.w.values[0]\n",
+ " print(w)\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from app.models.bbox import BBox"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import operator\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bboxes = []\n",
+ "dim = (250, 250)\n",
+ "bboxes.append(BBox.from_xywh_dim(10, 10, 40, 40, dim))\n",
+ "bboxes.append(BBox.from_xywh_dim(10, 10, 20, 20, dim))\n",
+ "bboxes.append(BBox.from_xywh_dim(10, 10, 200, 200, dim))\n",
+ "bboxes.append(BBox.from_xywh_dim(10, 10, 30, 30, dim))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.6399999999999999\n",
+ "0.0256\n",
+ "0.0144\n",
+ "0.006399999999999998\n",
+ "\n",
+ "0.6399999999999999\n",
+ "0.0256\n",
+ "0.0144\n",
+ "0.006399999999999998\n",
+ "max area is: 0.6399999999999999\n"
+ ]
+ }
+ ],
+ "source": [
+ "for b in bboxes:\n",
+ " print(b.area)\n",
+ "bboxes.sort(key = operator.attrgetter('area'), reverse=True)[0]\n",
+ "print('')\n",
+ "for b in bboxes:\n",
+ " print(b.area)\n",
+ "\n",
+ "print('max area is: {}'.format(bboxes[0].area))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bboxes.sort(key = operator.attrgetter('area'), reverse=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[<app.models.bbox.BBox at 0x7ff360047978>,\n",
+ " <app.models.bbox.BBox at 0x7ff3600479b0>,\n",
+ " <app.models.bbox.BBox at 0x7ff3600479e8>,\n",
+ " <app.models.bbox.BBox at 0x7ff360047a20>]"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bboxes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:megapixels]",
+ "language": "python",
+ "name": "conda-env-megapixels-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}