diff options
Diffstat (limited to 'megapixels/notebooks/datasets/megaface')
| -rw-r--r-- | megapixels/notebooks/datasets/megaface/megapixels_age_nyt.ipynb | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/megaface/megapixels_age_nyt.ipynb b/megapixels/notebooks/datasets/megaface/megapixels_age_nyt.ipynb new file mode 100644 index 00000000..b49bcc42 --- /dev/null +++ b/megapixels/notebooks/datasets/megaface/megapixels_age_nyt.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os.path import join\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from PIL import Image\n", + "import cv2 as cv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "fp_attr = '/data_store_hdd/datasets/people/megaface/metadata/face_attributes.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(fp_attr)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>age_apparent</th>\n", + " <th>age_real</th>\n", + " <th>f</th>\n", + " <th>m</th>\n", + " <th>roi_index</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>11.05</td>\n", + " <td>18.63</td>\n", + " <td>0.8155</td>\n", + " <td>0.1845</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>28.59</td>\n", + " <td>26.64</td>\n", + " <td>0.0219</td>\n", + " <td>0.9781</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>12.09</td>\n", + " <td>19.08</td>\n", + " <td>0.6808</td>\n", + " <td>0.3192</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>39.36</td>\n", + " <td>51.36</td>\n", + " <td>0.9943</td>\n", + " <td>0.0057</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>41.84</td>\n", + " <td>52.25</td>\n", + " <td>0.8226</td>\n", + " <td>0.1774</td>\n", + " <td>4</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index age_apparent age_real f m roi_index\n", + "0 0 11.05 18.63 0.8155 0.1845 0\n", + "1 1 28.59 26.64 0.0219 0.9781 1\n", + "2 2 12.09 19.08 0.6808 0.3192 2\n", + "3 3 39.36 51.36 0.9943 0.0057 3\n", + "4 4 41.84 52.25 0.8226 0.1774 4" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Less than 21yr: 311,973 (apparent), 168,619 (real)\n", + "Less than 18yr: 175,628 (apparent), 53,602 (real)\n", + "Less than 12yr: 35,235 (apparent), 773 (real)\n", + "Less than 8yr: 1,488 (apparent), 0 (real)\n" + ] + } + ], + "source": [ + "brackets = [21, 18, 12, 8]\n", + "for b in brackets:\n", + " age_ap = len(df[df['age_apparent'] < b])\n", + " age_real = len(df[df['age_real'] < b])\n", + " print(f\"Less than {b}yr: {age_ap:,} (apparent), {age_real:,} (real)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1559780" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "megapixels", + "language": "python", + "name": "megapixels" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
