diff options
| author | adamhrv <adam@ahprojects.com> | 2019-02-12 15:18:46 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-02-12 15:18:46 +0100 |
| commit | a5bdab8e798fcdc7885cfdabb0e5dd8076fa1d40 (patch) | |
| tree | 1e7a45a8d2c746994584cc5f8e4ccdabad82f8d8 /megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb | |
| parent | e95455a8a4013dafdeb7e41cfa8fb1f3ccc28dbb (diff) | |
reorder nbs
Diffstat (limited to 'megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb | 510 |
1 files changed, 0 insertions, 510 deletions
diff --git a/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb b/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb deleted file mode 100644 index 039614f0..00000000 --- a/megapixels/notebooks/datasets/lfw/lfw_make_identity_csv.ipynb +++ /dev/null @@ -1,510 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Add identity ID to index" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from os.path import join\n", - "from pathlib import Path\n", - "import difflib\n", - "\n", - "from tqdm import tqdm_notebook as tqdm\n", - "import pandas as pd\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# names\n", - "DATA_STORE = '/data_store_ssd/'\n", - "dir_dataset = 'datasets/people/lfw/metadata'" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "# split records into index and uuids\n", - "fp_identity_in = join(DATA_STORE, dir_dataset, 'identities_old.csv')\n", - "fp_identity_out = join(DATA_STORE, dir_dataset, 'identity_lookup.csv')\n", - "\n", - "df_identity = pd.read_csv(fp_identity_in).set_index('index')" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>name</th>\n", - " <th>name_orig</th>\n", - " <th>description</th>\n", - " <th>gender</th>\n", - " <th>images</th>\n", - " <th>image_index</th>\n", - " </tr>\n", - " <tr>\n", - " <th>index</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>A. J. Cook</td>\n", - " <td>AJ Cook</td>\n", - " <td>Canadian actress</td>\n", - " <td>f</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>AJ Lamas</td>\n", - " <td>AJ Lamas</td>\n", - " <td>American actor</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>Aaron Eckhart</td>\n", - " <td>Aaron Eckhart</td>\n", - " <td>American actor</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>Aaron Guiel</td>\n", - " <td>Aaron Guiel</td>\n", - " <td>Professional baseball player</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>Aaron Patterson</td>\n", - " <td>Aaron Patterson</td>\n", - " <td>Author</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " name name_orig description gender \\\n", - "index \n", - "0 A. J. Cook AJ Cook Canadian actress f \n", - "1 AJ Lamas AJ Lamas American actor m \n", - "2 Aaron Eckhart Aaron Eckhart American actor m \n", - "3 Aaron Guiel Aaron Guiel Professional baseball player m \n", - "4 Aaron Patterson Aaron Patterson Author m \n", - "\n", - " images image_index \n", - "index \n", - "0 1 0 \n", - "1 1 1 \n", - "2 1 2 \n", - "3 1 3 \n", - "4 1 4 " - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_identity.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>name</th>\n", - " <th>name_orig</th>\n", - " <th>description</th>\n", - " <th>gender</th>\n", - " <th>images</th>\n", - " <th>image_index</th>\n", - " <th>subdir</th>\n", - " </tr>\n", - " <tr>\n", - " <th>index</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>A. J. Cook</td>\n", - " <td>AJ Cook</td>\n", - " <td>Canadian actress</td>\n", - " <td>f</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td></td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>AJ Lamas</td>\n", - " <td>AJ Lamas</td>\n", - " <td>American actor</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td></td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " name name_orig description gender images image_index \\\n", - "index \n", - "0 A. J. Cook AJ Cook Canadian actress f 1 0 \n", - "1 AJ Lamas AJ Lamas American actor m 1 1 \n", - "\n", - " subdir \n", - "index \n", - "0 \n", - "1 " - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# associate each file with an identity\n", - "df_identity['subdir'] = [''] * len(df_identity)\n", - "df_identity.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ece5c11b90954b25b1f1e28fc2fe6b55", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(IntProgress(value=0, max=5749), HTML(value='')))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "for row in tqdm(df_identity.itertuples(), total=len(df_identity)):\n", - " name = row.name_orig\n", - " subdir = name.replace(' ','_')\n", - " df_identity.at[row.Index, 'subdir'] = subdir" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>name</th>\n", - " <th>name_orig</th>\n", - " <th>description</th>\n", - " <th>gender</th>\n", - " <th>images</th>\n", - " <th>image_index</th>\n", - " <th>subdir</th>\n", - " </tr>\n", - " <tr>\n", - " <th>index</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>A. J. Cook</td>\n", - " <td>AJ Cook</td>\n", - " <td>Canadian actress</td>\n", - " <td>f</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>AJ_Cook</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>AJ Lamas</td>\n", - " <td>AJ Lamas</td>\n", - " <td>American actor</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>AJ_Lamas</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>Aaron Eckhart</td>\n", - " <td>Aaron Eckhart</td>\n", - " <td>American actor</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>Aaron_Eckhart</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>Aaron Guiel</td>\n", - " <td>Aaron Guiel</td>\n", - " <td>Professional baseball player</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>Aaron_Guiel</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>Aaron Patterson</td>\n", - " <td>Aaron Patterson</td>\n", - " <td>Author</td>\n", - " <td>m</td>\n", - " <td>1</td>\n", - " <td>4</td>\n", - " <td>Aaron_Patterson</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " name name_orig description gender \\\n", - "index \n", - "0 A. J. Cook AJ Cook Canadian actress f \n", - "1 AJ Lamas AJ Lamas American actor m \n", - "2 Aaron Eckhart Aaron Eckhart American actor m \n", - "3 Aaron Guiel Aaron Guiel Professional baseball player m \n", - "4 Aaron Patterson Aaron Patterson Author m \n", - "\n", - " images image_index subdir \n", - "index \n", - "0 1 0 AJ_Cook \n", - "1 1 1 AJ_Lamas \n", - "2 1 2 Aaron_Eckhart \n", - "3 1 3 Aaron_Guiel \n", - "4 1 4 Aaron_Patterson " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_identity.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [], - "source": [ - "df_identity.to_csv(fp_identity_out)" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [], - "source": [ - "# make a clean index separate from files" - ] - }, - { - "cell_type": "code", - "execution_count": 145, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'AJ Lamas'" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#df_identies = pd.read_csv('identities.csv')\n", - "df_identities.iloc[1]['name']" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 2 3 4\n" - ] - } - ], - "source": [ - "a = [1,2,3,4]\n", - "\n", - "print(*a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:megapixels]", - "language": "python", - "name": "conda-env-megapixels-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} |
