reorder nbs

author: adamhrv <adam@ahprojects.com> 2019-02-12 15:18:46 +0100
committer: adamhrv <adam@ahprojects.com> 2019-02-12 15:18:46 +0100
commit: a5bdab8e798fcdc7885cfdabb0e5dd8076fa1d40 (patch)
tree: 1e7a45a8d2c746994584cc5f8e4ccdabad82f8d8 /megapixels/notebooks/datasets/lfw/lfw_names.ipynb
parent: e95455a8a4013dafdeb7e41cfa8fb1f3ccc28dbb (diff)
1 files changed, 0 insertions, 226 deletions
diff --git a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb b/megapixels/notebooks/datasets/lfw/lfw_names.ipynb
deleted file mode 100644
index 8c474dd7..00000000
--- a/megapixels/notebooks/datasets/lfw/lfw_names.ipynb
+++ /dev/null
@@ -1,226 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# LFW Names\n",
-    "\n",
-    "- add gender and format names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "import os\n",
-    "from os.path import join\n",
-    "import math\n",
-    "from glob import glob\n",
-    "from random import randint\n",
-    "\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "%matplotlib inline\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('/work/megapixels_dev/megapixels/')\n",
-    "from app.utils import file_utils"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DATA_STORE = '/data_store_nas/'\n",
-    "dir_dataset = join(DATA_STORE, 'datasets/people/lfw')\n",
-    "fp_names = join(dir_dataset, 'lfw_names.csv')\n",
-    "fp_male = join(dir_dataset, 'male_names.txt')\n",
-    "fp_female = join(dir_dataset, 'female_names.txt')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Alfred Ford', 'Craig Fitzgibbon']\n",
-      "['Claudia Coslovich', 'Allison Searing']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# load names\n",
-    "df_names = pd.read_csv(fp_names)\n",
-    "names = df_names.to_dict('index')\n",
-    "# load gender\n",
-    "names_male = file_utils.load_text(fp_male)\n",
-    "names_female = file_utils.load_text(fp_female)\n",
-    "# convert filenames to csv names\n",
-    "names_male = [t.replace('_',' ')[:-9] for t in names_male]\n",
-    "names_female = [t.replace('_',' ')[:-9] for t in names_female]\n",
-    "# check names\n",
-    "print(names_male[:2])\n",
-    "print(names_female[:2])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'name': 'AJ Cook', 'images': 1}\n"
-     ]
-    }
-   ],
-   "source": [
-    "for idx, n in names.items():\n",
-    "  print(n)\n",
-    "  break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add gender to name item dict\n",
-    "for idx, item in names.items():\n",
-    "  name = item['name']\n",
-    "  if name in names_male:\n",
-    "    g = 'm'\n",
-    "  elif name in names_female:\n",
-    "    g = 'f'\n",
-    "  elif name == 'Tara Kirk':\n",
-    "    g = 'f'  # unlabeled item\n",
-    "  else:\n",
-    "    g = 'x'\n",
-    "  names[idx]['gender'] = g"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'name': 'AJ Cook', 'images': 1, 'gender': 'f'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "names_list = list(names.values())\n",
-    "for n in names_list:\n",
-    "  print(n)\n",
-    "  break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save to csv\n",
-    "fp_gendered = join(dir_dataset, 'lfw_names_gendered.csv')\n",
-    "df_names_gendered = pd.DataFrame.from_dict(list(names.values()))  # ignore the indices\n",
-    "df_names_gendered.to_csv(fp_gendered, index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#%cat $fp_names_gendered | head -n2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[]\n",
-      "4277 1472 5749 None\n"
-     ]
-    }
-   ],
-   "source": [
-    "f = [x for k, x in names.items() if x['gender'] == 'f']\n",
-    "m = [x for k, x in names.items() if x['gender'] == 'm']\n",
-    "x = [x for k, x in names.items() if x['gender'] not in ['f','m']]\n",
-    "print(len(m), len(f), len(f) + len(m), print(x))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "5749\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(len(names))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [conda env:megapixels]",
-   "language": "python",
-   "name": "conda-env-megapixels-py"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
author	adamhrv <adam@ahprojects.com>	2019-02-12 15:18:46 +0100
committer	adamhrv <adam@ahprojects.com>	2019-02-12 15:18:46 +0100
commit	a5bdab8e798fcdc7885cfdabb0e5dd8076fa1d40 (patch)
tree	1e7a45a8d2c746994584cc5f8e4ccdabad82f8d8 /megapixels/notebooks/datasets/lfw/lfw_names.ipynb
parent	e95455a8a4013dafdeb7e41cfa8fb1f3ccc28dbb (diff)