{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Add identity ID to index" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from os.path import join\n", "from pathlib import Path\n", "import difflib\n", "\n", "from tqdm import tqdm_notebook as tqdm\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# names\n", "DATA_STORE = '/data_store_ssd/'\n", "dir_dataset = 'datasets/people/lfw/metadata'" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# split records into index and uuids\n", "fp_identity_in = join(DATA_STORE, dir_dataset, 'identities_old.csv')\n", "fp_identity_out = join(DATA_STORE, dir_dataset, 'identity_lookup.csv')\n", "\n", "df_identity = pd.read_csv(fp_identity_in).set_index('index')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namename_origdescriptiongenderimagesimage_index
index
0A. J. CookAJ CookCanadian actressf10
1AJ LamasAJ LamasAmerican actorm11
2Aaron EckhartAaron EckhartAmerican actorm12
3Aaron GuielAaron GuielProfessional baseball playerm13
4Aaron PattersonAaron PattersonAuthorm14
\n", "
" ], "text/plain": [ " name name_orig description gender \\\n", "index \n", "0 A. J. Cook AJ Cook Canadian actress f \n", "1 AJ Lamas AJ Lamas American actor m \n", "2 Aaron Eckhart Aaron Eckhart American actor m \n", "3 Aaron Guiel Aaron Guiel Professional baseball player m \n", "4 Aaron Patterson Aaron Patterson Author m \n", "\n", " images image_index \n", "index \n", "0 1 0 \n", "1 1 1 \n", "2 1 2 \n", "3 1 3 \n", "4 1 4 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_identity.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namename_origdescriptiongenderimagesimage_indexsubdir
index
0A. J. CookAJ CookCanadian actressf10
1AJ LamasAJ LamasAmerican actorm11
\n", "
" ], "text/plain": [ " name name_orig description gender images image_index \\\n", "index \n", "0 A. J. Cook AJ Cook Canadian actress f 1 0 \n", "1 AJ Lamas AJ Lamas American actor m 1 1 \n", "\n", " subdir \n", "index \n", "0 \n", "1 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# associate each file with an identity\n", "df_identity['subdir'] = [''] * len(df_identity)\n", "df_identity.head(2)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ece5c11b90954b25b1f1e28fc2fe6b55", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=5749), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "for row in tqdm(df_identity.itertuples(), total=len(df_identity)):\n", " name = row.name_orig\n", " subdir = name.replace(' ','_')\n", " df_identity.at[row.Index, 'subdir'] = subdir" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namename_origdescriptiongenderimagesimage_indexsubdir
index
0A. J. CookAJ CookCanadian actressf10AJ_Cook
1AJ LamasAJ LamasAmerican actorm11AJ_Lamas
2Aaron EckhartAaron EckhartAmerican actorm12Aaron_Eckhart
3Aaron GuielAaron GuielProfessional baseball playerm13Aaron_Guiel
4Aaron PattersonAaron PattersonAuthorm14Aaron_Patterson
\n", "
" ], "text/plain": [ " name name_orig description gender \\\n", "index \n", "0 A. J. Cook AJ Cook Canadian actress f \n", "1 AJ Lamas AJ Lamas American actor m \n", "2 Aaron Eckhart Aaron Eckhart American actor m \n", "3 Aaron Guiel Aaron Guiel Professional baseball player m \n", "4 Aaron Patterson Aaron Patterson Author m \n", "\n", " images image_index subdir \n", "index \n", "0 1 0 AJ_Cook \n", "1 1 1 AJ_Lamas \n", "2 1 2 Aaron_Eckhart \n", "3 1 3 Aaron_Guiel \n", "4 1 4 Aaron_Patterson " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_identity.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "df_identity.to_csv(fp_identity_out)" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [], "source": [ "# make a clean index separate from files" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'AJ Lamas'" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#df_identies = pd.read_csv('identities.csv')\n", "df_identities.iloc[1]['name']" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 2 3 4\n" ] } ], "source": [ "a = [1,2,3,4]\n", "\n", "print(*a)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:megapixels]", "language": "python", "name": "conda-env-megapixels-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }