{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# IMDB WIKI: Convert .mat to CSVs" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import os.path as osp\n", "from os.path import join\n", "from glob import glob\n", "import random\n", "import math\n", "from datetime import datetime\n", "\n", "import cv2 as cv\n", "import pandas as pd\n", "from scipy.io import loadmat\n", "import numpy as np\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "from tqdm import tqdm_notebook as tqdm\n", "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Metadata" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fp_mat = '/data_store_hdd/datasets/people/imdb_wiki/downloads/imdb.mat'\n", "dir_out = '/data_store_hdd/datasets/people/imdb_wiki/metadata/'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "mat_data = loadmat(fp_mat)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# row 3\n", "def load_parse_imdb_mat(mat):\n", " metadata = mat['imdb'][0][0]\n", " results = []\n", " num_records = len(metadata[0][0])\n", " print(f'loaded: {num_records} records')\n", " for i in tqdm(range(num_records), total=num_records):\n", " dob_matlab = metadata[0][0][i]\n", " dob = datetime.fromordinal(dob_matlab)\n", " dob_str = f'{dob.year}-{dob.month}-{dob.day}'\n", " year_photo = metadata[1][0][i]\n", " fp = metadata[2][0][i][0]\n", " gender_val = metadata[3][0][i]\n", " if gender_val == 0:\n", " gender = 'f'\n", " elif gender_val == 1:\n", " gender = 'm'\n", " else:\n", " gender = None\n", " name = metadata[4][0][i][0]\n", " roi = metadata[5][0][i][0]\n", " face_conf = metadata[6][0][i]\n", " face_conf_second = metadata[7][0][i]\n", " celeb_id = metadata[9][0][i]\n", " result = {\n", " 'dob': dob_str,\n", " 'year_photo': year_photo,\n", " 'filepath': fp,\n", " 'gender': gender,\n", " 'name': name,\n", " 'x1': roi[0],\n", " 'y1': roi[1],\n", " 'x2': roi[2],\n", " 'y2': roi[3],\n", " 'celeb_id': celeb_id\n", " }\n", " results.append(result)\n", " return results" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "loaded: 460723 records\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8a4a106e3bee4fde89492ceef50b9c05", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=460723), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "results_meta = load_parse_imdb_mat(mat_data)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df_meta = pd.DataFrame.from_dict(results_meta)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | celeb_id | \n", "dob | \n", "filepath | \n", "gender | \n", "name | \n", "x1 | \n", "x2 | \n", "y1 | \n", "y2 | \n", "year_photo | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "6488 | \n", "1900-5-11 | \n", "01/nm0000001_rm124825600_1899-5-10_1968.jpg | \n", "m | \n", "Fred Astaire | \n", "1072.926000 | \n", "1214.784000 | \n", "161.838000 | \n", "303.696000 | \n", "1968 | \n", "
| 1 | \n", "6488 | \n", "1900-5-11 | \n", "01/nm0000001_rm3343756032_1899-5-10_1970.jpg | \n", "m | \n", "Fred Astaire | \n", "477.184000 | \n", "622.592000 | \n", "100.352000 | \n", "245.760000 | \n", "1970 | \n", "
| 2 | \n", "6488 | \n", "1900-5-11 | \n", "01/nm0000001_rm577153792_1899-5-10_1968.jpg | \n", "m | \n", "Fred Astaire | \n", "114.969643 | \n", "451.686572 | \n", "114.969643 | \n", "451.686572 | \n", "1968 | \n", "
| 3 | \n", "6488 | \n", "1900-5-11 | \n", "01/nm0000001_rm946909184_1899-5-10_1968.jpg | \n", "m | \n", "Fred Astaire | \n", "622.885506 | \n", "844.339008 | \n", "424.217504 | \n", "645.671006 | \n", "1968 | \n", "
| 4 | \n", "6488 | \n", "1900-5-11 | \n", "01/nm0000001_rm980463616_1899-5-10_1968.jpg | \n", "m | \n", "Fred Astaire | \n", "1013.859002 | \n", "1201.586128 | \n", "233.882042 | \n", "421.609168 | \n", "1968 | \n", "