{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Append UUID to SHA256 CSV"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import os\n",
"from os.path import join\n",
"from glob import glob\n",
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import sys\n",
"sys.path.append('/work/megapixels_dev/megapixels/')\n",
"from app.utils import file_utils"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"DATA_STORE = '/data_store_nas/'\n",
"dir_dataset = join(DATA_STORE, 'datasets/people/lfw/')\n",
"dir_metadata = join(dir_dataset, 'metadata')"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"fp_files = join(dir_metadata, 'image_files.csv')\n",
"fp_rois = join(dir_metadata, 'image_rois.csv')\n",
"fp_identities = join(dir_metadata, 'identities.csv')\n",
"fp_pose = join(dir_metadata, 'image_pose.csv')"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"df_rois = pd.read_csv(fp_rois).set_index('index')\n",
"df_files = pd.read_csv(fp_files).set_index('index')\n",
"df_identities = pd.read_csv(fp_identities).set_index('index')"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" h | \n",
" image_height | \n",
" image_index | \n",
" image_width | \n",
" w | \n",
" x | \n",
" y | \n",
"
\n",
" \n",
" | index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.330000 | \n",
" 250 | \n",
" 0 | \n",
" 250 | \n",
" 0.330000 | \n",
" 0.336667 | \n",
" 0.350000 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.393333 | \n",
" 250 | \n",
" 1 | \n",
" 250 | \n",
" 0.393333 | \n",
" 0.286667 | \n",
" 0.313333 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" h image_height image_index image_width w x \\\n",
"index \n",
"0 0.330000 250 0 250 0.330000 0.336667 \n",
"1 0.393333 250 1 250 0.393333 0.286667 \n",
"\n",
" y \n",
"index \n",
"0 0.350000 \n",
"1 0.313333 "
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rois.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ext | \n",
" fn | \n",
" subdir | \n",
"
\n",
" \n",
" | index | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" jpg | \n",
" AJ_Cook_0001 | \n",
" AJ_Cook | \n",
"
\n",
" \n",
" | 1 | \n",
" jpg | \n",
" AJ_Lamas_0001 | \n",
" AJ_Lamas | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ext fn subdir\n",
"index \n",
"0 jpg AJ_Cook_0001 AJ_Cook\n",
"1 jpg AJ_Lamas_0001 AJ_Lamas"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_files.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" description | \n",
" gender | \n",
" images | \n",
" image_index | \n",
"
\n",
" \n",
" | index | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A. J. Cook | \n",
" Canadian actress | \n",
" f | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" AJ Lamas | \n",
" American actor | \n",
" m | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name description gender images image_index\n",
"index \n",
"0 A. J. Cook Canadian actress f 1 0\n",
"1 AJ Lamas American actor m 1 1"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_identities.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5749"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_identities)"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13233"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_files)"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {},
"outputs": [],
"source": [
"indices_included = list(df_rois['image_index'])"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
"indices_all = list(range(13233))"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{6072, 1556, 11207}\n"
]
}
],
"source": [
"print(set(indices_all) - set(indices_included))"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm_notebook as tqdm"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [],
"source": [
"df_roi_groups = df_rois.groupby('image_index')"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "df3b603ad0464cb6983862fbf6716d02",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=13230), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"AJ_Cook_0001\n",
"0.33\n",
"\n"
]
}
],
"source": [
"for df_roi_group_idx, df_roi_group in tqdm(df_roi_groups):\n",
" image_index = df_roi_group.image_index.values[0]\n",
" pds_file = df_files.iloc[image_index]\n",
" print(pds_file.fn)\n",
" w = df_roi_group.w.values[0]\n",
" print(w)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"from app.models.bbox import BBox"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"import operator\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"bboxes = []\n",
"dim = (250, 250)\n",
"bboxes.append(BBox.from_xywh_dim(10, 10, 40, 40, dim))\n",
"bboxes.append(BBox.from_xywh_dim(10, 10, 20, 20, dim))\n",
"bboxes.append(BBox.from_xywh_dim(10, 10, 200, 200, dim))\n",
"bboxes.append(BBox.from_xywh_dim(10, 10, 30, 30, dim))"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.6399999999999999\n",
"0.0256\n",
"0.0144\n",
"0.006399999999999998\n",
"\n",
"0.6399999999999999\n",
"0.0256\n",
"0.0144\n",
"0.006399999999999998\n",
"max area is: 0.6399999999999999\n"
]
}
],
"source": [
"for b in bboxes:\n",
" print(b.area)\n",
"bboxes.sort(key = operator.attrgetter('area'), reverse=True)[0]\n",
"print('')\n",
"for b in bboxes:\n",
" print(b.area)\n",
"\n",
"print('max area is: {}'.format(bboxes[0].area))"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"bboxes.sort(key = operator.attrgetter('area'), reverse=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[,\n",
" ,\n",
" ,\n",
" ]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bboxes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:megapixels]",
"language": "python",
"name": "conda-env-megapixels-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}