summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/lfw/lfw_names.ipynb
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-02-12 15:18:46 +0100
committeradamhrv <adam@ahprojects.com>2019-02-12 15:18:46 +0100
commita5bdab8e798fcdc7885cfdabb0e5dd8076fa1d40 (patch)
tree1e7a45a8d2c746994584cc5f8e4ccdabad82f8d8 /megapixels/notebooks/datasets/lfw/lfw_names.ipynb
parente95455a8a4013dafdeb7e41cfa8fb1f3ccc28dbb (diff)
reorder nbs
Diffstat (limited to 'megapixels/notebooks/datasets/lfw/lfw_names.ipynb')
-rw-r--r--megapixels/notebooks/datasets/lfw/lfw_names.ipynb226
1 files changed, 0 insertions, 226 deletions
diff --git a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb b/megapixels/notebooks/datasets/lfw/lfw_names.ipynb
deleted file mode 100644
index 8c474dd7..00000000
--- a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb
+++ /dev/null
@@ -1,226 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# LFW Names\n",
- "\n",
- "- add gender and format names"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "%reload_ext autoreload\n",
- "%autoreload 2\n",
- "\n",
- "import os\n",
- "from os.path import join\n",
- "import math\n",
- "from glob import glob\n",
- "from random import randint\n",
- "\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "import sys\n",
- "sys.path.append('/work/megapixels_dev/megapixels/')\n",
- "from app.utils import file_utils"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "DATA_STORE = '/data_store_nas/'\n",
- "dir_dataset = join(DATA_STORE, 'datasets/people/lfw')\n",
- "fp_names = join(dir_dataset, 'lfw_names.csv')\n",
- "fp_male = join(dir_dataset, 'male_names.txt')\n",
- "fp_female = join(dir_dataset, 'female_names.txt')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['Alfred Ford', 'Craig Fitzgibbon']\n",
- "['Claudia Coslovich', 'Allison Searing']\n"
- ]
- }
- ],
- "source": [
- "# load names\n",
- "df_names = pd.read_csv(fp_names)\n",
- "names = df_names.to_dict('index')\n",
- "# load gender\n",
- "names_male = file_utils.load_text(fp_male)\n",
- "names_female = file_utils.load_text(fp_female)\n",
- "# convert filenames to csv names\n",
- "names_male = [t.replace('_',' ')[:-9] for t in names_male]\n",
- "names_female = [t.replace('_',' ')[:-9] for t in names_female]\n",
- "# check names\n",
- "print(names_male[:2])\n",
- "print(names_female[:2])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'name': 'AJ Cook', 'images': 1}\n"
- ]
- }
- ],
- "source": [
- "for idx, n in names.items():\n",
- " print(n)\n",
- " break"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [],
- "source": [
- "# add gender to name item dict\n",
- "for idx, item in names.items():\n",
- " name = item['name']\n",
- " if name in names_male:\n",
- " g = 'm'\n",
- " elif name in names_female:\n",
- " g = 'f'\n",
- " elif name == 'Tara Kirk':\n",
- " g = 'f' # unlabeled item\n",
- " else:\n",
- " g = 'x'\n",
- " names[idx]['gender'] = g"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'name': 'AJ Cook', 'images': 1, 'gender': 'f'}\n"
- ]
- }
- ],
- "source": [
- "names_list = list(names.values())\n",
- "for n in names_list:\n",
- " print(n)\n",
- " break"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [],
- "source": [
- "# save to csv\n",
- "fp_gendered = join(dir_dataset, 'lfw_names_gendered.csv')\n",
- "df_names_gendered = pd.DataFrame.from_dict(list(names.values())) # ignore the indices\n",
- "df_names_gendered.to_csv(fp_gendered, index=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [],
- "source": [
- "#%cat $fp_names_gendered | head -n2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[]\n",
- "4277 1472 5749 None\n"
- ]
- }
- ],
- "source": [
- "f = [x for k, x in names.items() if x['gender'] == 'f']\n",
- "m = [x for k, x in names.items() if x['gender'] == 'm']\n",
- "x = [x for k, x in names.items() if x['gender'] not in ['f','m']]\n",
- "print(len(m), len(f), len(f) + len(m), print(x))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "5749\n"
- ]
- }
- ],
- "source": [
- "print(len(names))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python [conda env:megapixels]",
- "language": "python",
- "name": "conda-env-megapixels-py"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}