{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Append UUID to SHA256 CSV" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "from os.path import join\n", "import math\n", "from glob import glob\n", "from random import randint\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "from tqdm import tqdm\n", "\n", "import sys\n", "sys.path.append('/work/megapixels_dev/megapixels/')\n", "from app.utils import file_utils" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DATA_STORE = '/data_store_ssd/'\n", "dir_dataset = join(DATA_STORE, 'apps/megapixels/datasets/lfw')\n", "fp_shas = join(dir_dataset, 'records.csv')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "fp_in = '/data_store_ssd/apps/megapixels/datasets/lfw/records.csv'\n", "fp_in = '/data_store_ssd/apps/megapixels/datasets/lfw/rois.csv'\n", "fp_out = '/data_store_ssd/apps/megapixels/datasets/lfw/rois_min.csv'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df_records = pd.read_csv(fp_in)\n", "df_rois = pd.read_csv(fp_in)\n", "nrecords = len(df_records)\n", "nrois = len(df_rois)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# add explicit index to rois\n", "df_rois['idx'] = [0] * nrois" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 14399/14399 [00:15<00:00, 914.63it/s]\n" ] } ], "source": [ "for idx, row in tqdm(df_rois.iterrows(), total=nrois):\n", " fn = row['fn']\n", " subdir = row['subdir']\n", " # get sha256 from records where match\n", " row_match = df_records.loc[(df_records['subdir'] == subdir)]\n", " df_rois.at[idx, 'idx'] = int(row_match.index[0])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | ext | \n", "fn | \n", "h | \n", "image_height | \n", "image_width | \n", "subdir | \n", "w | \n", "x | \n", "y | \n", "idx | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "jpg | \n", "AJ_Cook_0001 | \n", "0.330000 | \n", "250 | \n", "250 | \n", "AJ_Cook | \n", "0.330000 | \n", "0.336667 | \n", "0.350000 | \n", "0 | \n", "
| 1 | \n", "jpg | \n", "AJ_Lamas_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "AJ_Lamas | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "1 | \n", "
| 2 | \n", "jpg | \n", "Aaron_Eckhart_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Eckhart | \n", "0.393333 | \n", "0.286667 | \n", "0.273333 | \n", "2 | \n", "
| 3 | \n", "jpg | \n", "Aaron_Guiel_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Guiel | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "3 | \n", "
| 4 | \n", "jpg | \n", "Aaron_Patterson_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Patterson | \n", "0.393333 | \n", "0.286667 | \n", "0.273333 | \n", "4 | \n", "
| 5 | \n", "jpg | \n", "Aaron_Peirsol_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Peirsol | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "5 | \n", "
| 6 | \n", "jpg | \n", "Aaron_Peirsol_0002 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Peirsol | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "5 | \n", "
| 7 | \n", "jpg | \n", "Aaron_Peirsol_0003 | \n", "0.326667 | \n", "250 | \n", "250 | \n", "Aaron_Peirsol | \n", "0.330000 | \n", "0.336667 | \n", "0.320000 | \n", "5 | \n", "
| 8 | \n", "jpg | \n", "Aaron_Peirsol_0004 | \n", "0.330000 | \n", "250 | \n", "250 | \n", "Aaron_Peirsol | \n", "0.330000 | \n", "0.336667 | \n", "0.350000 | \n", "5 | \n", "
| 9 | \n", "jpg | \n", "Aaron_Pena_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Pena | \n", "0.393333 | \n", "0.326667 | \n", "0.273333 | \n", "9 | \n", "
| 10 | \n", "jpg | \n", "Aaron_Sorkin_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Sorkin | \n", "0.393333 | \n", "0.246667 | \n", "0.313333 | \n", "10 | \n", "
| 11 | \n", "jpg | \n", "Aaron_Sorkin_0002 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Aaron_Sorkin | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "10 | \n", "
| 12 | \n", "jpg | \n", "Aaron_Tippin_0001 | \n", "0.330000 | \n", "250 | \n", "250 | \n", "Aaron_Tippin | \n", "0.330000 | \n", "0.270000 | \n", "0.350000 | \n", "12 | \n", "
| 13 | \n", "jpg | \n", "Abba_Eban_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Abba_Eban | \n", "0.393333 | \n", "0.286667 | \n", "0.313333 | \n", "13 | \n", "
| 14 | \n", "jpg | \n", "Abbas_Kiarostami_0001 | \n", "0.330000 | \n", "250 | \n", "250 | \n", "Abbas_Kiarostami | \n", "0.330000 | \n", "0.303333 | \n", "0.350000 | \n", "14 | \n", "
| 15 | \n", "jpg | \n", "Abdel_Aziz_Al-Hakim_0001 | \n", "0.330000 | \n", "250 | \n", "250 | \n", "Abdel_Aziz_Al-Hakim | \n", "0.330000 | \n", "0.303333 | \n", "0.350000 | \n", "15 | \n", "
| 16 | \n", "jpg | \n", "Abdel_Aziz_Al-Hakim_0001 | \n", "0.270000 | \n", "250 | \n", "250 | \n", "Abdel_Aziz_Al-Hakim | \n", "0.273333 | \n", "0.673333 | \n", "0.376667 | \n", "15 | \n", "
| 17 | \n", "jpg | \n", "Abdel_Madi_Shabneh_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Abdel_Madi_Shabneh | \n", "0.393333 | \n", "0.246667 | \n", "0.313333 | \n", "16 | \n", "
| 18 | \n", "jpg | \n", "Abdel_Nasser_Assidi_0001 | \n", "0.393333 | \n", "250 | \n", "250 | \n", "Abdel_Nasser_Assidi | \n", "0.393333 | \n", "0.286667 | \n", "0.273333 | \n", "17 | \n", "
| 19 | \n", "jpg | \n", "Abdel_Nasser_Assidi_0001 | \n", "0.190000 | \n", "250 | \n", "250 | \n", "Abdel_Nasser_Assidi | \n", "0.190000 | \n", "0.753333 | \n", "0.446667 | \n", "17 | \n", "