summaryrefslogtreecommitdiff
path: root/megapixels/notebooks
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2018-12-17 01:37:31 +0100
committeradamhrv <adam@ahprojects.com>2018-12-17 01:37:31 +0100
commit88ec48e1c4d93ba9cd3aa186c068ef2aa4c27c56 (patch)
tree506075c0c8f0d4bbf15e97c6db50b6e055c5bd4e /megapixels/notebooks
parent23e9fef5dce8b0b15dd94713816b9d7d45f12356 (diff)
fixing dataset procesosrs
Diffstat (limited to 'megapixels/notebooks')
-rw-r--r--megapixels/notebooks/_local_scratch.ipynb196
-rw-r--r--megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb510
-rw-r--r--megapixels/notebooks/examples/face_recognition_demo.ipynb4
3 files changed, 617 insertions, 93 deletions
diff --git a/megapixels/notebooks/_local_scratch.ipynb b/megapixels/notebooks/_local_scratch.ipynb
index 167b6ddd..cee17cba 100644
--- a/megapixels/notebooks/_local_scratch.ipynb
+++ b/megapixels/notebooks/_local_scratch.ipynb
@@ -1,161 +1,173 @@
{
"cells": [
{
- "cell_type": "code",
- "execution_count": 1,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "import pandas as pd\n",
- "import cv2 as cv\n",
- "import numpy as np\n",
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt"
+ "# Scratch pad"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "import sys\n",
"from glob import glob\n",
"from os.path import join\n",
"from pathlib import Path\n",
+ "import random\n",
+ "\n",
+ "import pandas as pd\n",
+ "import cv2 as cv\n",
+ "import numpy as np\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sys\n",
"sys.path.append('/work/megapixels_dev/megapixels')\n",
"from app.models.bbox import BBox\n",
- "#from app.utils import im_utils\n",
- "import random"
+ "from app.utils import im_utils, file_utils"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "dir_ims = '/data_store_ssd/apps/megapixels/datasets/umd_faces/faces/'"
+ "a= [1]"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0\n"
- ]
+ "data": {
+ "text/plain": [
+ "1"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "fp_ims = glob(join(dir_ims, '*.png'))\n",
- "print(len(fp_ims))"
+ "a[-1]"
]
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Help on function choice in module random:\n",
- "\n",
- "choice(self, seq)\n",
- " Choose a random element from a non-empty sequence.\n",
- "\n"
- ]
- }
- ],
- "source": [
- "help(random.sample)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[1, 8, 0, 6, 3] True\n"
- ]
- }
- ],
- "source": [
- "a = list(range(0,10))\n",
- "b = random.sample(a, 5)\n",
- "print(b, len(set(b))==5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
- "from random import randint\n",
- "imu"
+ "fp_filepath = '/data_store_ssd/datasets/people/lfw/metadata/filepath.csv'\n",
+ "df_filepath = pd.read_csv(fp_filepath)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
- "import face_alignment\n",
- "from skimage import io\n",
- "\n",
- "fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False, device='cuda')"
+ "image_index = 12467"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 55,
"metadata": {},
- "outputs": [],
- "source": [
- "fp_im = np.random.choice(fp_ims)\n",
- "im = io.imread(fp_im)\n",
- "preds = fa.get_landmarks(im)\n",
- "print(preds[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "12474\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "index 12851\n",
+ "ext jpg\n",
+ "fn Vladimir_Putin_0029\n",
+ "subdir Vladimir_Putin\n",
+ "Name: 12474, dtype: object"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "import json"
+ "image_index += 1\n",
+ "print(image_index)\n",
+ "df_filepath.iloc[image_index]"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
- "print(len(preds[0]))\n"
+ "import imutils"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 57,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Help on function build_montages in module imutils.convenience:\n",
+ "\n",
+ "build_montages(image_list, image_shape, montage_shape)\n",
+ " ---------------------------------------------------------------------------------------------\n",
+ " author: Kyle Hounslow\n",
+ " ---------------------------------------------------------------------------------------------\n",
+ " Converts a list of single images into a list of 'montage' images of specified rows and columns.\n",
+ " A new montage image is started once rows and columns of montage image is filled.\n",
+ " Empty space of incomplete montage images are filled with black pixels\n",
+ " ---------------------------------------------------------------------------------------------\n",
+ " :param image_list: python list of input images\n",
+ " :param image_shape: tuple, size each image will be resized to for display (width, height)\n",
+ " :param montage_shape: tuple, shape of image montage (width, height)\n",
+ " :return: list of montage images in numpy array format\n",
+ " ---------------------------------------------------------------------------------------------\n",
+ " \n",
+ " example usage:\n",
+ " \n",
+ " # load single image\n",
+ " img = cv2.imread('lena.jpg')\n",
+ " # duplicate image 25 times\n",
+ " num_imgs = 25\n",
+ " img_list = []\n",
+ " for i in xrange(num_imgs):\n",
+ " img_list.append(img)\n",
+ " # convert image list into a montage of 256x256 images tiled in a 5x5 montage\n",
+ " montages = make_montages_of_images(img_list, (256, 256), (5, 5))\n",
+ " # iterate through montages and display\n",
+ " for montage in montages:\n",
+ " cv2.imshow('montage image', montage)\n",
+ " cv2.waitKey(0)\n",
+ " \n",
+ " ----------------------------------------------------------------------------------------------\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "with open('test.json', 'w') as fp:\n",
- " json.dump(preds[0].tolist(), fp)"
+ "help(imutils.build_montages)"
]
},
{
@@ -182,7 +194,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.5"
+ "version": "3.6.6"
}
},
"nbformat": 4,
diff --git a/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb b/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb
new file mode 100644
index 00000000..039614f0
--- /dev/null
+++ b/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb
@@ -0,0 +1,510 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Add identity ID to index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from os.path import join\n",
+ "from pathlib import Path\n",
+ "import difflib\n",
+ "\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# names\n",
+ "DATA_STORE = '/data_store_ssd/'\n",
+ "dir_dataset = 'datasets/people/lfw/metadata'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# split records into index and uuids\n",
+ "fp_identity_in = join(DATA_STORE, dir_dataset, 'identities_old.csv')\n",
+ "fp_identity_out = join(DATA_STORE, dir_dataset, 'identity_lookup.csv')\n",
+ "\n",
+ "df_identity = pd.read_csv(fp_identity_in).set_index('index')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>name_orig</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>image_index</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>A. J. Cook</td>\n",
+ " <td>AJ Cook</td>\n",
+ " <td>Canadian actress</td>\n",
+ " <td>f</td>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>Aaron Eckhart</td>\n",
+ " <td>Aaron Eckhart</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>Aaron Guiel</td>\n",
+ " <td>Aaron Guiel</td>\n",
+ " <td>Professional baseball player</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>3</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>Aaron Patterson</td>\n",
+ " <td>Aaron Patterson</td>\n",
+ " <td>Author</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>4</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name name_orig description gender \\\n",
+ "index \n",
+ "0 A. J. Cook AJ Cook Canadian actress f \n",
+ "1 AJ Lamas AJ Lamas American actor m \n",
+ "2 Aaron Eckhart Aaron Eckhart American actor m \n",
+ "3 Aaron Guiel Aaron Guiel Professional baseball player m \n",
+ "4 Aaron Patterson Aaron Patterson Author m \n",
+ "\n",
+ " images image_index \n",
+ "index \n",
+ "0 1 0 \n",
+ "1 1 1 \n",
+ "2 1 2 \n",
+ "3 1 3 \n",
+ "4 1 4 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identity.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>name_orig</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>image_index</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>A. J. Cook</td>\n",
+ " <td>AJ Cook</td>\n",
+ " <td>Canadian actress</td>\n",
+ " <td>f</td>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td></td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td></td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name name_orig description gender images image_index \\\n",
+ "index \n",
+ "0 A. J. Cook AJ Cook Canadian actress f 1 0 \n",
+ "1 AJ Lamas AJ Lamas American actor m 1 1 \n",
+ "\n",
+ " subdir \n",
+ "index \n",
+ "0 \n",
+ "1 "
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# associate each file with an identity\n",
+ "df_identity['subdir'] = [''] * len(df_identity)\n",
+ "df_identity.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ece5c11b90954b25b1f1e28fc2fe6b55",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=5749), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for row in tqdm(df_identity.itertuples(), total=len(df_identity)):\n",
+ " name = row.name_orig\n",
+ " subdir = name.replace(' ','_')\n",
+ " df_identity.at[row.Index, 'subdir'] = subdir"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>name_orig</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>image_index</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>A. J. Cook</td>\n",
+ " <td>AJ Cook</td>\n",
+ " <td>Canadian actress</td>\n",
+ " <td>f</td>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>AJ_Cook</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>AJ Lamas</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>AJ_Lamas</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>Aaron Eckhart</td>\n",
+ " <td>Aaron Eckhart</td>\n",
+ " <td>American actor</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " <td>Aaron_Eckhart</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>Aaron Guiel</td>\n",
+ " <td>Aaron Guiel</td>\n",
+ " <td>Professional baseball player</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>3</td>\n",
+ " <td>Aaron_Guiel</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>Aaron Patterson</td>\n",
+ " <td>Aaron Patterson</td>\n",
+ " <td>Author</td>\n",
+ " <td>m</td>\n",
+ " <td>1</td>\n",
+ " <td>4</td>\n",
+ " <td>Aaron_Patterson</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name name_orig description gender \\\n",
+ "index \n",
+ "0 A. J. Cook AJ Cook Canadian actress f \n",
+ "1 AJ Lamas AJ Lamas American actor m \n",
+ "2 Aaron Eckhart Aaron Eckhart American actor m \n",
+ "3 Aaron Guiel Aaron Guiel Professional baseball player m \n",
+ "4 Aaron Patterson Aaron Patterson Author m \n",
+ "\n",
+ " images image_index subdir \n",
+ "index \n",
+ "0 1 0 AJ_Cook \n",
+ "1 1 1 AJ_Lamas \n",
+ "2 1 2 Aaron_Eckhart \n",
+ "3 1 3 Aaron_Guiel \n",
+ "4 1 4 Aaron_Patterson "
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identity.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity.to_csv(fp_identity_out)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# make a clean index separate from files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'AJ Lamas'"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#df_identies = pd.read_csv('identities.csv')\n",
+ "df_identities.iloc[1]['name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 2 3 4\n"
+ ]
+ }
+ ],
+ "source": [
+ "a = [1,2,3,4]\n",
+ "\n",
+ "print(*a)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:megapixels]",
+ "language": "python",
+ "name": "conda-env-megapixels-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/megapixels/notebooks/examples/face_recognition_demo.ipynb b/megapixels/notebooks/examples/face_recognition_demo.ipynb
index 68c5f3b6..804c63b6 100644
--- a/megapixels/notebooks/examples/face_recognition_demo.ipynb
+++ b/megapixels/notebooks/examples/face_recognition_demo.ipynb
@@ -402,7 +402,9 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "import imutils"
+ ]
},
{
"cell_type": "code",