diff options
| author | adamhrv <adam@ahprojects.com> | 2019-02-12 15:18:46 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-02-12 15:18:46 +0100 |
| commit | a5bdab8e798fcdc7885cfdabb0e5dd8076fa1d40 (patch) | |
| tree | 1e7a45a8d2c746994584cc5f8e4ccdabad82f8d8 /megapixels/notebooks/datasets/lfw/lfw_names.ipynb | |
| parent | e95455a8a4013dafdeb7e41cfa8fb1f3ccc28dbb (diff) | |
reorder nbs
Diffstat (limited to 'megapixels/notebooks/datasets/lfw/lfw_names.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/lfw/lfw_names.ipynb | 226 |
1 files changed, 0 insertions, 226 deletions
diff --git a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb b/megapixels/notebooks/datasets/lfw/lfw_names.ipynb deleted file mode 100644 index 8c474dd7..00000000 --- a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb +++ /dev/null @@ -1,226 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# LFW Names\n", - "\n", - "- add gender and format names" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "%reload_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import os\n", - "from os.path import join\n", - "import math\n", - "from glob import glob\n", - "from random import randint\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import sys\n", - "sys.path.append('/work/megapixels_dev/megapixels/')\n", - "from app.utils import file_utils" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "DATA_STORE = '/data_store_nas/'\n", - "dir_dataset = join(DATA_STORE, 'datasets/people/lfw')\n", - "fp_names = join(dir_dataset, 'lfw_names.csv')\n", - "fp_male = join(dir_dataset, 'male_names.txt')\n", - "fp_female = join(dir_dataset, 'female_names.txt')" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Alfred Ford', 'Craig Fitzgibbon']\n", - "['Claudia Coslovich', 'Allison Searing']\n" - ] - } - ], - "source": [ - "# load names\n", - "df_names = pd.read_csv(fp_names)\n", - "names = df_names.to_dict('index')\n", - "# load gender\n", - "names_male = file_utils.load_text(fp_male)\n", - "names_female = file_utils.load_text(fp_female)\n", - "# convert filenames to csv names\n", - "names_male = [t.replace('_',' ')[:-9] for t in names_male]\n", - "names_female = [t.replace('_',' ')[:-9] for t in names_female]\n", - "# check names\n", - "print(names_male[:2])\n", - "print(names_female[:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'name': 'AJ Cook', 'images': 1}\n" - ] - } - ], - "source": [ - "for idx, n in names.items():\n", - " print(n)\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "# add gender to name item dict\n", - "for idx, item in names.items():\n", - " name = item['name']\n", - " if name in names_male:\n", - " g = 'm'\n", - " elif name in names_female:\n", - " g = 'f'\n", - " elif name == 'Tara Kirk':\n", - " g = 'f' # unlabeled item\n", - " else:\n", - " g = 'x'\n", - " names[idx]['gender'] = g" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'name': 'AJ Cook', 'images': 1, 'gender': 'f'}\n" - ] - } - ], - "source": [ - "names_list = list(names.values())\n", - "for n in names_list:\n", - " print(n)\n", - " break" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "# save to csv\n", - "fp_gendered = join(dir_dataset, 'lfw_names_gendered.csv')\n", - "df_names_gendered = pd.DataFrame.from_dict(list(names.values())) # ignore the indices\n", - "df_names_gendered.to_csv(fp_gendered, index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "#%cat $fp_names_gendered | head -n2" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n", - "4277 1472 5749 None\n" - ] - } - ], - "source": [ - "f = [x for k, x in names.items() if x['gender'] == 'f']\n", - "m = [x for k, x in names.items() if x['gender'] == 'm']\n", - "x = [x for k, x in names.items() if x['gender'] not in ['f','m']]\n", - "print(len(m), len(f), len(f) + len(m), print(x))" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5749\n" - ] - } - ], - "source": [ - "print(len(names))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:megapixels]", - "language": "python", - "name": "conda-env-megapixels-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} |
