summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb')
-rw-r--r--megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb2020
1 files changed, 2020 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb b/megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb
new file mode 100644
index 00000000..91ca1626
--- /dev/null
+++ b/megapixels/notebooks/datasets/identity/vgg_face2_clean_meta_kg.ipynb
@@ -0,0 +1,2020 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Append UUID to SHA256 CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 186,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from os.path import join\n",
+ "from pathlib import Path\n",
+ "import difflib\n",
+ "\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/data_store_nas/datasets/people/vgg_face2/metadata/records_uuid.csv True\n",
+ "/data_store_nas/datasets/people/vgg_face2/metadata/records.csv True\n"
+ ]
+ }
+ ],
+ "source": [
+ "# names\n",
+ "DATA_STORE_NAS = '/data_store_nas/'\n",
+ "dir_dataset = 'datasets/people/vgg_face2/metadata'\n",
+ "fp_records_uuids = join(DATA_STORE_NAS, dir_dataset, 'records_uuid.csv')\n",
+ "fp_uuids_new = join(DATA_STORE_NAS, dir_dataset, 'uuids.csv')\n",
+ "# record\n",
+ "fp_records = join(DATA_STORE_NAS, dir_datset, 'records.csv')\n",
+ "fp_records_new = join(DATA_STORE_NAS, dir_datset, 'records_new.csv')\n",
+ "print(fp_uuids, Path(fp_uuids).is_file())\n",
+ "print(fp_records, Path(fp_records).is_file())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def similarity(a, b):\n",
+ " seq = difflib.SequenceMatcher(a=a.lower(), b=b.lower())\n",
+ " return seq.ratio()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_records = pd.read_csv(fp_records)\n",
+ "df_records_uuids = pd.read_csv(fp_records_uuids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>sha256</th>\n",
+ " <th>subdir</th>\n",
+ " <th>uuid</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10...</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>88ac6abd-6039-442b-b31f-2db8d575363a</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>e360f93613baa68cede6731d2603873cdabd3993841cfd...</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>73acbc00-2cb5-4260-8db3-b88ca7c29c72</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn sha256 \\\n",
+ "0 jpg 0089_01 a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10... \n",
+ "1 jpg 0168_01 e360f93613baa68cede6731d2603873cdabd3993841cfd... \n",
+ "\n",
+ " subdir uuid \n",
+ "0 test/n006211 88ac6abd-6039-442b-b31f-2db8d575363a \n",
+ "1 test/n006211 73acbc00-2cb5-4260-8db3-b88ca7c29c72 "
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_uuids.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>sha256</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>e360f93613baa68cede6731d2603873cdabd3993841cfd...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn sha256 \\\n",
+ "0 jpg 0089_01 a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10... \n",
+ "1 jpg 0168_01 e360f93613baa68cede6731d2603873cdabd3993841cfd... \n",
+ "\n",
+ " subdir \n",
+ "0 test/n006211 \n",
+ "1 test/n006211 "
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_records.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# fix the records and save to new csv\n",
+ "df_records['index'] = [''] * len(df_records)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for idx, row in tqdm(df_records.iterrows(), total=len(df_records)):\n",
+ " df_records.at[idx, 'index'] = idx"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>sha256</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>e360f93613baa68cede6731d2603873cdabd3993841cfd...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0213_01</td>\n",
+ " <td>3920a8bdf523a5bf7da9258ec414a741462d0cfbec8d2c...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0010_01</td>\n",
+ " <td>577ce218e4a61e612942c55fd172cac4b48becacbfc708...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0115_01</td>\n",
+ " <td>b27d37425a4e59dc4d37c3df331d0b69e4919338a3d46f...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn sha256 \\\n",
+ "index \n",
+ "0 jpg 0089_01 a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10... \n",
+ "1 jpg 0168_01 e360f93613baa68cede6731d2603873cdabd3993841cfd... \n",
+ "2 jpg 0213_01 3920a8bdf523a5bf7da9258ec414a741462d0cfbec8d2c... \n",
+ "3 jpg 0010_01 577ce218e4a61e612942c55fd172cac4b48becacbfc708... \n",
+ "4 jpg 0115_01 b27d37425a4e59dc4d37c3df331d0b69e4919338a3d46f... \n",
+ "\n",
+ " subdir \n",
+ "index \n",
+ "0 test/n006211 \n",
+ "1 test/n006211 \n",
+ "2 test/n006211 \n",
+ "3 test/n006211 \n",
+ "4 test/n006211 "
+ ]
+ },
+ "execution_count": 128,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#df_records.set_index('index')\n",
+ "#df_records.head()\n",
+ "df_records.to_csv(fp_records_new)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_files = df_records.copy()\n",
+ "fp_files = join(DATA_STORE_NAS, dir_datset, 'files.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 133,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_files = df_files.drop(['sha256'], axis=1)\n",
+ "df_files.to_csv(fp_files)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# make another CSV just for the sha\n",
+ "df_sha256s = df_records.copy()\n",
+ "fp_sha256s = join(DATA_STORE_NAS, dir_datset, 'sha256s.csv')\n",
+ "df_sha256s = df_sha256s.drop(['ext', 'fn', 'subdir'], axis=1)\n",
+ "df_sha256s.to_csv(fp_sha256s)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>sha256</th>\n",
+ " <th>subdir</th>\n",
+ " <th>uuid</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10...</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>88ac6abd-6039-442b-b31f-2db8d575363a</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>e360f93613baa68cede6731d2603873cdabd3993841cfd...</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>73acbc00-2cb5-4260-8db3-b88ca7c29c72</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn sha256 \\\n",
+ "0 jpg 0089_01 a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10... \n",
+ "1 jpg 0168_01 e360f93613baa68cede6731d2603873cdabd3993841cfd... \n",
+ "\n",
+ " subdir uuid \n",
+ "0 test/n006211 88ac6abd-6039-442b-b31f-2db8d575363a \n",
+ "1 test/n006211 73acbc00-2cb5-4260-8db3-b88ca7c29c72 "
+ ]
+ },
+ "execution_count": 141,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# create another CSV just for the UUIDs\n",
+ "df_records_uuids.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 142,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_uuids = df_records_uuids.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_uuids = df_uuids.drop(['subdir', 'fn', 'ext', 'sha256'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>uuid</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>88ac6abd-6039-442b-b31f-2db8d575363a</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>73acbc00-2cb5-4260-8db3-b88ca7c29c72</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " uuid\n",
+ "0 88ac6abd-6039-442b-b31f-2db8d575363a\n",
+ "1 73acbc00-2cb5-4260-8db3-b88ca7c29c72"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_uuids.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_uuids.index.name = 'index'\n",
+ "df_uuids.to_csv(fp_uuids_new)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for idx, row in tqdm(df_records_uuids[:2].iterrows(), total=len(df_records_uuids)):\n",
+ " sha256 = row['sha256']\n",
+ " row_match = df_records.loc[(df_records['subdir'] == subdir)]\n",
+ " df_rois.at[idx, 'idx'] = int(row_match.index[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Change ROI to use index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 372,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_rois = join(DATA_STORE_NAS, dir_datset, 'rois.csv')\n",
+ "df_rois = pd.read_csv(fp_rois)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 373,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>sha256</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>e360f93613baa68cede6731d2603873cdabd3993841cfd...</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn sha256 \\\n",
+ "index \n",
+ "0 jpg 0089_01 a39a1df855cb0c70dc553c5e9afa35b4f7c00f4011ca10... \n",
+ "1 jpg 0168_01 e360f93613baa68cede6731d2603873cdabd3993841cfd... \n",
+ "\n",
+ " subdir \n",
+ "index \n",
+ "0 test/n006211 \n",
+ "1 test/n006211 "
+ ]
+ },
+ "execution_count": 373,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_records.index.name = 'index'\n",
+ "df_records.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 374,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Unnamed: 0</th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>h</th>\n",
+ " <th>image_height</th>\n",
+ " <th>image_width</th>\n",
+ " <th>subdir</th>\n",
+ " <th>w</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " <th>index_new</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.473333</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.668246</td>\n",
+ " <td>0.279621</td>\n",
+ " <td>0.28</td>\n",
+ " <td>-1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.326667</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.464455</td>\n",
+ " <td>-0.156398</td>\n",
+ " <td>0.12</td>\n",
+ " <td>-1</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Unnamed: 0 ext fn h image_height image_width \\\n",
+ "index \n",
+ "0 0 jpg 0089_01 0.473333 304 214 \n",
+ "1 1 jpg 0089_01 0.326667 304 214 \n",
+ "\n",
+ " subdir w x y index_new \n",
+ "index \n",
+ "0 test/n006211 0.668246 0.279621 0.28 -1 \n",
+ "1 test/n006211 0.464455 -0.156398 0.12 -1 "
+ ]
+ },
+ "execution_count": 374,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rois['index_new'] = [-1] * len(df_rois)\n",
+ "df_rois.index.name = 'index'\n",
+ "df_rois.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 375,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_records_copy = df_records.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 376,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "9131\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_records_subdirs = df_records_copy.groupby('subdir')\n",
+ "print(len(df_records_subdirs))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 377,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "9131\n"
+ ]
+ }
+ ],
+ "source": [
+ "roi_subdir_groups = df_rois.groupby('subdir')\n",
+ "print(len(roi_subdir_groups))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 387,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "module 'pandas' has no attribute 'index'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-387-82023aa58c79>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mhelp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m: module 'pandas' has no attribute 'index'"
+ ]
+ }
+ ],
+ "source": [
+ "help(pd.index)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 390,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "81817\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(row.Index)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 392,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "91680fc6bee04ce087a60be57ab5a58c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=9131), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "for subdir, record_group in tqdm(df_records_subdirs, total=df_records_subdirs.ngroups):\n",
+ " #print(name) # 'test/n00001'\n",
+ " roi_group = roi_subdir_groups.get_group(subdir)\n",
+ "# print(type(roi_group))\n",
+ " # for every item in the roi_group, get index from record group\n",
+ " for row in roi_group.itertuples():\n",
+ " #row_match = record_group.loc[record_group['fn'] == row.fn]\n",
+ " # get the index from record group where it matches this fn\n",
+ " #print(len(record_group))\n",
+ " #record_group.where('fn',row.fn)\n",
+ " row_match = record_group.loc[(record_group['fn'] == row.fn)]\n",
+ " df_rois.at[row.Index, 'index_new'] = int(row_match.index[0])\n",
+ " #record_group[record_group['fn'].str.match(fn)]\n",
+ " \n",
+ " #print(int(row_match.index[0]))\n",
+ " #print('subdir: {}, fn: {}, index: {}'.format(row.subdir, row.fn, master_index))\n",
+ " \n",
+ " # NB avoid using iterrows() is very slow. use iteritems\n",
+ " #print(roi_row['fn'])\n",
+ " #print(row.at['subdir', 0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 411,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Unnamed: 0</th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>h</th>\n",
+ " <th>image_height</th>\n",
+ " <th>image_width</th>\n",
+ " <th>subdir</th>\n",
+ " <th>w</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " <th>index_new</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.473333</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.668246</td>\n",
+ " <td>0.279621</td>\n",
+ " <td>0.28</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.326667</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.464455</td>\n",
+ " <td>-0.156398</td>\n",
+ " <td>0.12</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Unnamed: 0 ext fn h image_height image_width \\\n",
+ "index \n",
+ "0 0 jpg 0089_01 0.473333 304 214 \n",
+ "1 1 jpg 0089_01 0.326667 304 214 \n",
+ "\n",
+ " subdir w x y index_new \n",
+ "index \n",
+ "0 test/n006211 0.668246 0.279621 0.28 0 \n",
+ "1 test/n006211 0.464455 -0.156398 0.12 0 "
+ ]
+ },
+ "execution_count": 411,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rois_new = df_rois.copy()\n",
+ "df_rois_new.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 413,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_rois_new = df_rois_new.drop(df_rois_new.columns[df_rois_new.columns.str.contains('unnamed',case = False)],axis = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 422,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_rois_new = df_rois_new.set_index('index_new')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 423,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['ext', 'fn', 'h', 'image_height', 'image_width', 'subdir', 'w', 'x', 'y']"
+ ]
+ },
+ "execution_count": 423,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(df_rois_new.columns.values)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 425,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>h</th>\n",
+ " <th>image_height</th>\n",
+ " <th>image_width</th>\n",
+ " <th>subdir</th>\n",
+ " <th>w</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index_new</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.473333</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.668246</td>\n",
+ " <td>0.279621</td>\n",
+ " <td>0.280000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.326667</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.464455</td>\n",
+ " <td>-0.156398</td>\n",
+ " <td>0.120000</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>471</td>\n",
+ " <td>419</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.443609</td>\n",
+ " <td>0.263158</td>\n",
+ " <td>0.273333</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0213_01</td>\n",
+ " <td>0.462745</td>\n",
+ " <td>408</td>\n",
+ " <td>480</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>0.246667</td>\n",
+ " <td>0.082353</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0115_01</td>\n",
+ " <td>0.438662</td>\n",
+ " <td>360</td>\n",
+ " <td>401</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>0.286667</td>\n",
+ " <td>0.245353</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn h image_height image_width subdir \\\n",
+ "index_new \n",
+ "0 jpg 0089_01 0.473333 304 214 test/n006211 \n",
+ "0 jpg 0089_01 0.326667 304 214 test/n006211 \n",
+ "1 jpg 0168_01 0.393333 471 419 test/n006211 \n",
+ "2 jpg 0213_01 0.462745 408 480 test/n006211 \n",
+ "4 jpg 0115_01 0.438662 360 401 test/n006211 \n",
+ "\n",
+ " w x y \n",
+ "index_new \n",
+ "0 0.668246 0.279621 0.280000 \n",
+ "0 0.464455 -0.156398 0.120000 \n",
+ "1 0.443609 0.263158 0.273333 \n",
+ "2 0.393333 0.246667 0.082353 \n",
+ "4 0.393333 0.286667 0.245353 "
+ ]
+ },
+ "execution_count": 425,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rois_new.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 426,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_rois_new = join(DATA_STORE_NAS, dir_dataset, 'rois_new.csv')\n",
+ "df_rois_new.to_csv(fp_rois_new)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Fix identity meta"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 458,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity_meta = pd.read_csv(fp_identity_meta)\n",
+ "df_files = pd.read_csv(fp_files).set_index('index')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 459,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>subdir</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>test/n006211</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " ext fn subdir\n",
+ "index \n",
+ "0 jpg 0089_01 test/n006211\n",
+ "1 jpg 0168_01 test/n006211"
+ ]
+ },
+ "execution_count": 459,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_files.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 460,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>class_id</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>name</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>n009279</td>\n",
+ " <td>Former soccer player</td>\n",
+ " <td>f</td>\n",
+ " <td>365</td>\n",
+ " <td>Noriko Baba</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>n009278</td>\n",
+ " <td>Japanese singer-songwriter</td>\n",
+ " <td>f</td>\n",
+ " <td>181</td>\n",
+ " <td>Hiromi Satō</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " class_id description gender images name\n",
+ "0 n009279 Former soccer player f 365 Noriko Baba\n",
+ "1 n009278 Japanese singer-songwriter f 181 Hiromi Satō"
+ ]
+ },
+ "execution_count": 460,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identity_meta.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 461,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# create a lookup table of ids\n",
+ "class_ids = {}\n",
+ "for row in df_files.itertuples():\n",
+ " class_id = row.subdir.split('/')[1]\n",
+ " if class_id not in class_ids.keys():\n",
+ " class_ids[class_id] = row.Index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 463,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity_meta['index_new'] = [-1] * len(df_identity_meta)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 464,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# add col to identity for 'index_new'\n",
+ "for row in df_identity_meta.itertuples():\n",
+ " df_identity_meta.at[row.Index, 'index_new'] = class_ids[row.class_id]\n",
+ "# iterate through"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 465,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>class_id</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>name</th>\n",
+ " <th>index_new</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>n009279</td>\n",
+ " <td>Former soccer player</td>\n",
+ " <td>f</td>\n",
+ " <td>365</td>\n",
+ " <td>Noriko Baba</td>\n",
+ " <td>1808008</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>n009278</td>\n",
+ " <td>Japanese singer-songwriter</td>\n",
+ " <td>f</td>\n",
+ " <td>181</td>\n",
+ " <td>Hiromi Satō</td>\n",
+ " <td>943052</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>n009277</td>\n",
+ " <td>Japanese fashion model</td>\n",
+ " <td>m</td>\n",
+ " <td>409</td>\n",
+ " <td>Ranko Kanbe</td>\n",
+ " <td>595852</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>n009276</td>\n",
+ " <td>Japanese musician</td>\n",
+ " <td>f</td>\n",
+ " <td>177</td>\n",
+ " <td>Yurie Matsui</td>\n",
+ " <td>2922103</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>n009275</td>\n",
+ " <td>Japanese idol</td>\n",
+ " <td>f</td>\n",
+ " <td>501</td>\n",
+ " <td>Karin Miyamoto</td>\n",
+ " <td>1388262</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " class_id description gender images name \\\n",
+ "0 n009279 Former soccer player f 365 Noriko Baba \n",
+ "1 n009278 Japanese singer-songwriter f 181 Hiromi Satō \n",
+ "2 n009277 Japanese fashion model m 409 Ranko Kanbe \n",
+ "3 n009276 Japanese musician f 177 Yurie Matsui \n",
+ "4 n009275 Japanese idol f 501 Karin Miyamoto \n",
+ "\n",
+ " index_new \n",
+ "0 1808008 \n",
+ "1 943052 \n",
+ "2 595852 \n",
+ "3 2922103 \n",
+ "4 1388262 "
+ ]
+ },
+ "execution_count": 465,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identity_meta.set_index('index_new')\n",
+ "df_identity_meta.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 466,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_identity_meta_new = join(DATA_STORE_NAS, dir_dataset, 'identity_meta_indexed.csv')\n",
+ "df_identity_meta.to_csv(fp_identity_meta_new)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for idx, row in tqdm(df_records.iterrows(), total=len(df_rois)):\n",
+ " subdir = row['subdir']\n",
+ " fn = row['fn']\n",
+ " row_match_subdir = df_records_copy.loc[(df_records_copy['subdir'] == subdir)]\n",
+ " for idx_subdir, row_subdir in row_match_subdir.iterrows(): \n",
+ " row_match_fn = row_match.loc[(row_match['fn'] == fn)]\n",
+ " df_rois.at[idx, 'index'] = \n",
+ " int(row_match.index[0])\n",
+ " df_records.drop(df.index[2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 187,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "04ad99a7cba9443ebee5b26a1c4cddf1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=3325795), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Exception ignored in: <bound method tqdm.__del__ of 0%| | 139/3325795 [01:34<123:32:18, 7.48it/s]>\n",
+ "Traceback (most recent call last):\n",
+ " File \"/home/adam/anaconda3/envs/megapixels/lib/python3.6/site-packages/tqdm/_tqdm.py\", line 931, in __del__\n",
+ " self.close()\n",
+ " File \"/home/adam/anaconda3/envs/megapixels/lib/python3.6/site-packages/tqdm/_tqdm.py\", line 1133, in close\n",
+ " self._decr_instances(self)\n",
+ " File \"/home/adam/anaconda3/envs/megapixels/lib/python3.6/site-packages/tqdm/_tqdm.py\", line 496, in _decr_instances\n",
+ " cls.monitor.exit()\n",
+ " File \"/home/adam/anaconda3/envs/megapixels/lib/python3.6/site-packages/tqdm/_monitor.py\", line 52, in exit\n",
+ " self.join()\n",
+ " File \"/home/adam/anaconda3/envs/megapixels/lib/python3.6/threading.py\", line 1053, in join\n",
+ " raise RuntimeError(\"cannot join current thread\")\n",
+ "RuntimeError: cannot join current thread\n"
+ ]
+ },
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m<ipython-input-187-f9325ab8bb02>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0msubdir\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'subdir'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mfn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'fn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mrow_match\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_records\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_records\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'subdir'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0msubdir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mrow_match\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrow_match\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow_match\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'fn'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mdf_rois\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mat\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'index'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow_match\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m~/anaconda3/envs/megapixels/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, other, axis)\u001b[0m\n\u001b[1;32m 1281\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1282\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1283\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mna_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1284\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1285\u001b[0m raise TypeError('Could not compare {typ} type with Series'\n",
+ "\u001b[0;32m~/anaconda3/envs/megapixels/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36mna_op\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 1141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1142\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1143\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_comp_method_OBJECT_ARRAY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1145\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_datetimelike_v_numeric\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m~/anaconda3/envs/megapixels/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36m_comp_method_OBJECT_ARRAY\u001b[0;34m(op, x, y)\u001b[0m\n\u001b[1;32m 1120\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvec_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1121\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1122\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscalar_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1123\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1124\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "for idx, row in tqdm(df_rois.iterrows(), total=len(df_rois)):\n",
+ " subdir = row['subdir']\n",
+ " fn = row['fn']\n",
+ " row_match_subdir = df_records_copy.loc[(df_records_copy['subdir'] == subdir)]\n",
+ " for idx_subdir, row_subdir in row_match_subdir.iterrows(): \n",
+ " row_match_fn = row_match.loc[(row_match['fn'] == fn)]\n",
+ " df_rois.at[idx, 'index'] = int(row_match.index[0])\n",
+ " df_records.drop(df.index[2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 184,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "subdir = 'test/n006211'\n",
+ "rows_records = df_records.loc[df_records['subdir'] == subdir]\n",
+ "rows_rois = df_rois.loc[df_rois['subdir'] == subdir ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 181,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>Unnamed: 0</th>\n",
+ " <th>ext</th>\n",
+ " <th>fn</th>\n",
+ " <th>h</th>\n",
+ " <th>image_height</th>\n",
+ " <th>image_width</th>\n",
+ " <th>subdir</th>\n",
+ " <th>w</th>\n",
+ " <th>x</th>\n",
+ " <th>y</th>\n",
+ " <th>index</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>0</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.473333</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.668246</td>\n",
+ " <td>0.279621</td>\n",
+ " <td>0.280000</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0089_01</td>\n",
+ " <td>0.326667</td>\n",
+ " <td>304</td>\n",
+ " <td>214</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.464455</td>\n",
+ " <td>-0.156398</td>\n",
+ " <td>0.120000</td>\n",
+ " <td>0</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>2</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0168_01</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>471</td>\n",
+ " <td>419</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.443609</td>\n",
+ " <td>0.263158</td>\n",
+ " <td>0.273333</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>3</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0213_01</td>\n",
+ " <td>0.462745</td>\n",
+ " <td>408</td>\n",
+ " <td>480</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>0.246667</td>\n",
+ " <td>0.082353</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>4</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0115_01</td>\n",
+ " <td>0.438662</td>\n",
+ " <td>360</td>\n",
+ " <td>401</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>0.286667</td>\n",
+ " <td>0.245353</td>\n",
+ " <td>4</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>5</th>\n",
+ " <td>5</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0511_02</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>417</td>\n",
+ " <td>415</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.395973</td>\n",
+ " <td>0.265101</td>\n",
+ " <td>0.233333</td>\n",
+ " <td>6</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>6</th>\n",
+ " <td>6</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0032_01</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>143</td>\n",
+ " <td>125</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.450382</td>\n",
+ " <td>0.255725</td>\n",
+ " <td>0.313333</td>\n",
+ " <td>7</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>7</th>\n",
+ " <td>7</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0502_01</td>\n",
+ " <td>0.326667</td>\n",
+ " <td>288</td>\n",
+ " <td>252</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.377863</td>\n",
+ " <td>0.603053</td>\n",
+ " <td>0.583333</td>\n",
+ " <td>8</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>8</th>\n",
+ " <td>8</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0502_01</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>288</td>\n",
+ " <td>252</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.450382</td>\n",
+ " <td>0.301527</td>\n",
+ " <td>0.313333</td>\n",
+ " <td>8</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>9</th>\n",
+ " <td>9</td>\n",
+ " <td>jpg</td>\n",
+ " <td>0201_01</td>\n",
+ " <td>0.393333</td>\n",
+ " <td>187</td>\n",
+ " <td>164</td>\n",
+ " <td>test/n006211</td>\n",
+ " <td>0.448669</td>\n",
+ " <td>0.243346</td>\n",
+ " <td>0.313333</td>\n",
+ " <td>9</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " Unnamed: 0 ext fn h image_height image_width \\\n",
+ "0 0 jpg 0089_01 0.473333 304 214 \n",
+ "1 1 jpg 0089_01 0.326667 304 214 \n",
+ "2 2 jpg 0168_01 0.393333 471 419 \n",
+ "3 3 jpg 0213_01 0.462745 408 480 \n",
+ "4 4 jpg 0115_01 0.438662 360 401 \n",
+ "5 5 jpg 0511_02 0.393333 417 415 \n",
+ "6 6 jpg 0032_01 0.393333 143 125 \n",
+ "7 7 jpg 0502_01 0.326667 288 252 \n",
+ "8 8 jpg 0502_01 0.393333 288 252 \n",
+ "9 9 jpg 0201_01 0.393333 187 164 \n",
+ "\n",
+ " subdir w x y index \n",
+ "0 test/n006211 0.668246 0.279621 0.280000 0 \n",
+ "1 test/n006211 0.464455 -0.156398 0.120000 0 \n",
+ "2 test/n006211 0.443609 0.263158 0.273333 1 \n",
+ "3 test/n006211 0.393333 0.246667 0.082353 2 \n",
+ "4 test/n006211 0.393333 0.286667 0.245353 4 \n",
+ "5 test/n006211 0.395973 0.265101 0.233333 6 \n",
+ "6 test/n006211 0.450382 0.255725 0.313333 7 \n",
+ "7 test/n006211 0.377863 0.603053 0.583333 8 \n",
+ "8 test/n006211 0.450382 0.301527 0.313333 8 \n",
+ "9 test/n006211 0.448669 0.243346 0.313333 9 "
+ ]
+ },
+ "execution_count": 181,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rois.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_meta.to_csv(fp_out)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_meta = join(dir_dataset, 'identity_meta_kg_clean.csv')\n",
+ "df = pd.read_csv(fp_meta)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "descs = []\n",
+ "for idx, row in df.iterrows():\n",
+ " descs.append(row['description'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "descs = set(descs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 472,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_identity_meta = join(DATA_STORE_NAS, dir_dataset, 'identity_meta_indexed.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 473,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity_meta = pd.read_csv(fp_identity_meta)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 474,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>class_id</th>\n",
+ " <th>description</th>\n",
+ " <th>gender</th>\n",
+ " <th>images</th>\n",
+ " <th>name</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>n000001</td>\n",
+ " <td>Dalai Lama</td>\n",
+ " <td>m</td>\n",
+ " <td>424</td>\n",
+ " <td>14th Dalai Lama</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>n000002</td>\n",
+ " <td>American singer-songwriter</td>\n",
+ " <td>f</td>\n",
+ " <td>315</td>\n",
+ " <td>A Fine Frenzy</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>n000003</td>\n",
+ " <td>British writer</td>\n",
+ " <td>m</td>\n",
+ " <td>205</td>\n",
+ " <td>A. A. Gill</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>n000004</td>\n",
+ " <td>Canadian-Irish actor</td>\n",
+ " <td>m</td>\n",
+ " <td>387</td>\n",
+ " <td>AJ Buckley</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>n000005</td>\n",
+ " <td>Baseball catcher</td>\n",
+ " <td>m</td>\n",
+ " <td>229</td>\n",
+ " <td>AJ Pierzynski</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " class_id description gender images name\n",
+ "0 n000001 Dalai Lama m 424 14th Dalai Lama\n",
+ "1 n000002 American singer-songwriter f 315 A Fine Frenzy\n",
+ "2 n000003 British writer m 205 A. A. Gill\n",
+ "3 n000004 Canadian-Irish actor m 387 AJ Buckley\n",
+ "4 n000005 Baseball catcher m 229 AJ Pierzynski"
+ ]
+ },
+ "execution_count": 474,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_identity_meta.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 475,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity_meta.index.name = 'index'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 476,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_identity_meta.to_csv(fp_identity_meta)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:megapixels]",
+ "language": "python",
+ "name": "conda-env-megapixels-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}