summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/visualizations/age_gender.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/visualizations/age_gender.ipynb')
-rw-r--r--megapixels/notebooks/datasets/visualizations/age_gender.ipynb137
1 files changed, 137 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/visualizations/age_gender.ipynb b/megapixels/notebooks/datasets/visualizations/age_gender.ipynb
new file mode 100644
index 00000000..8d64ecfe
--- /dev/null
+++ b/megapixels/notebooks/datasets/visualizations/age_gender.ipynb
@@ -0,0 +1,137 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Age Gender to CSV for Site"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "from os.path import join\n",
+ "import math\n",
+ "import time\n",
+ "from glob import glob\n",
+ "import datetime\n",
+ "from collections import OrderedDict\n",
+ "from operator import itemgetter\n",
+ "from datetime import datetime\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import imutils\n",
+ "import random\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import random\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dk = 'helen'\n",
+ "fp_in = '/data_store_hdd/datasets/people/helen/metadata/face_attributes.csv'\n",
+ "fp_out_age = f'/work/megapixels_dev/site/content/pages/datasets/{dk}/assets/age.csv'\n",
+ "fp_out_gender = f'/work/megapixels_dev/site/content/pages/datasets/{dk}/assets/gender.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(fp_in)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Age\n",
+ "results = []\n",
+ "brackets = [(0, 12), (13, 18), (19,24), (25, 34), (35, 44), (45, 54), (55, 64), (64, 75), (75, 100)]\n",
+ "df_age = df['age_real']\n",
+ "\n",
+ "for a1, a2 in brackets:\n",
+ " n = len(df_age.loc[((df_age >= a1) & (df_age <= a2))])\n",
+ " results.append({'age': f'{a1} - {a2}', 'faces': n})\n",
+ " \n",
+ "df_out = pd.DataFrame.from_dict(results)\n",
+ "df_out = df_out[['age','faces']]\n",
+ "df_out.to_csv(fp_out_age, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Gender\n",
+ "results = []\n",
+ "\n",
+ "df_f = df['f']\n",
+ "nm = len(df_f.loc[((df_f < 0.33))])\n",
+ "nnb = len(df_f.loc[((df_f >= 0.33) & (df_f <= 0.66))])\n",
+ "nf = len(df_f.loc[((df_f > 0.66))])\n",
+ "\n",
+ "results = []\n",
+ "results.append({'gender': 'male', 'faces':nm})\n",
+ "results.append({'gender': 'female', 'faces': nf})\n",
+ "results.append({'gender': 'they', 'faces': nnb})\n",
+ "\n",
+ "df_out = pd.DataFrame.from_dict(results)\n",
+ "df_out = df_out[['gender','faces']]\n",
+ "df_out.to_csv(fp_out_gender, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}