diff options
Diffstat (limited to 'megapixels/notebooks/datasets/test_pd_funcs.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/test_pd_funcs.ipynb | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/test_pd_funcs.ipynb b/megapixels/notebooks/datasets/test_pd_funcs.ipynb new file mode 100644 index 00000000..62c2b0db --- /dev/null +++ b/megapixels/notebooks/datasets/test_pd_funcs.ipynb @@ -0,0 +1,582 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Append UUID to SHA256 CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import os\n", + "from os.path import join\n", + "from glob import glob\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sys\n", + "sys.path.append('/work/megapixels_dev/megapixels/')\n", + "from app.utils import file_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_STORE = '/data_store_nas/'\n", + "dir_dataset = join(DATA_STORE, 'datasets/people/lfw/')\n", + "dir_metadata = join(dir_dataset, 'metadata')" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "fp_files = join(dir_metadata, 'image_files.csv')\n", + "fp_rois = join(dir_metadata, 'image_rois.csv')\n", + "fp_identities = join(dir_metadata, 'identities.csv')\n", + "fp_pose = join(dir_metadata, 'image_pose.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [], + "source": [ + "df_rois = pd.read_csv(fp_rois).set_index('index')\n", + "df_files = pd.read_csv(fp_files).set_index('index')\n", + "df_identities = pd.read_csv(fp_identities).set_index('index')" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>h</th>\n", + " <th>image_height</th>\n", + " <th>image_index</th>\n", + " <th>image_width</th>\n", + " <th>w</th>\n", + " <th>x</th>\n", + " <th>y</th>\n", + " </tr>\n", + " <tr>\n", + " <th>index</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.330000</td>\n", + " <td>250</td>\n", + " <td>0</td>\n", + " <td>250</td>\n", + " <td>0.330000</td>\n", + " <td>0.336667</td>\n", + " <td>0.350000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.393333</td>\n", + " <td>250</td>\n", + " <td>1</td>\n", + " <td>250</td>\n", + " <td>0.393333</td>\n", + " <td>0.286667</td>\n", + " <td>0.313333</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " h image_height image_index image_width w x \\\n", + "index \n", + "0 0.330000 250 0 250 0.330000 0.336667 \n", + "1 0.393333 250 1 250 0.393333 0.286667 \n", + "\n", + " y \n", + "index \n", + "0 0.350000 \n", + "1 0.313333 " + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_rois.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>ext</th>\n", + " <th>fn</th>\n", + " <th>subdir</th>\n", + " </tr>\n", + " <tr>\n", + " <th>index</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>jpg</td>\n", + " <td>AJ_Cook_0001</td>\n", + " <td>AJ_Cook</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>jpg</td>\n", + " <td>AJ_Lamas_0001</td>\n", + " <td>AJ_Lamas</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " ext fn subdir\n", + "index \n", + "0 jpg AJ_Cook_0001 AJ_Cook\n", + "1 jpg AJ_Lamas_0001 AJ_Lamas" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_files.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>description</th>\n", + " <th>gender</th>\n", + " <th>images</th>\n", + " <th>image_index</th>\n", + " </tr>\n", + " <tr>\n", + " <th>index</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>A. J. Cook</td>\n", + " <td>Canadian actress</td>\n", + " <td>f</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>AJ Lamas</td>\n", + " <td>American actor</td>\n", + " <td>m</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name description gender images image_index\n", + "index \n", + "0 A. J. Cook Canadian actress f 1 0\n", + "1 AJ Lamas American actor m 1 1" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_identities.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5749" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df_identities)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13233" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df_files)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [], + "source": [ + "indices_included = list(df_rois['image_index'])" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "indices_all = list(range(13233))" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{6072, 1556, 11207}\n" + ] + } + ], + "source": [ + "print(set(indices_all) - set(indices_included))" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm_notebook as tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "df_roi_groups = df_rois.groupby('image_index')" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "df3b603ad0464cb6983862fbf6716d02", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=13230), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AJ_Cook_0001\n", + "0.33\n", + "\n" + ] + } + ], + "source": [ + "for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):\n", + " image_index = df_roi_group.image_index.values[0]\n", + " pds_file = df_files.iloc[image_index]\n", + " print(pds_file.fn)\n", + " w = df_roi_group.w.values[0]\n", + " print(w)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "from app.models.bbox import BBox" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import operator\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "bboxes = []\n", + "dim = (250, 250)\n", + "bboxes.append(BBox.from_xywh_dim(10, 10, 40, 40, dim))\n", + "bboxes.append(BBox.from_xywh_dim(10, 10, 20, 20, dim))\n", + "bboxes.append(BBox.from_xywh_dim(10, 10, 200, 200, dim))\n", + "bboxes.append(BBox.from_xywh_dim(10, 10, 30, 30, dim))" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6399999999999999\n", + "0.0256\n", + "0.0144\n", + "0.006399999999999998\n", + "\n", + "0.6399999999999999\n", + "0.0256\n", + "0.0144\n", + "0.006399999999999998\n", + "max area is: 0.6399999999999999\n" + ] + } + ], + "source": [ + "for b in bboxes:\n", + " print(b.area)\n", + "bboxes.sort(key = operator.attrgetter('area'), reverse=True)[0]\n", + "print('')\n", + "for b in bboxes:\n", + " print(b.area)\n", + "\n", + "print('max area is: {}'.format(bboxes[0].area))" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "bboxes.sort(key = operator.attrgetter('area'), reverse=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[<app.models.bbox.BBox at 0x7ff360047978>,\n", + " <app.models.bbox.BBox at 0x7ff3600479b0>,\n", + " <app.models.bbox.BBox at 0x7ff3600479e8>,\n", + " <app.models.bbox.BBox at 0x7ff360047a20>]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bboxes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:megapixels]", + "language": "python", + "name": "conda-env-megapixels-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
