summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/identity/identity_testing.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/identity/identity_testing.ipynb')
-rw-r--r--megapixels/notebooks/datasets/identity/identity_testing.ipynb665
1 files changed, 665 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/identity/identity_testing.ipynb b/megapixels/notebooks/datasets/identity/identity_testing.ipynb
new file mode 100644
index 00000000..384cca93
--- /dev/null
+++ b/megapixels/notebooks/datasets/identity/identity_testing.ipynb
@@ -0,0 +1,665 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Knowledge Graph Identities\n",
+ "\n",
+ "- convert filename-names to names\n",
+ "- fetch Google Knowledge Graph entity IDs for each name\n",
+ "- save KG IDs to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "import os.path as osp\n",
+ "from os.path import join\n",
+ "from glob import glob\n",
+ "from pathlib import Path\n",
+ "import random\n",
+ "import math\n",
+ "from datetime import datetime\n",
+ "import requests\n",
+ "import json\n",
+ "import time\n",
+ "from pprint import pprint\n",
+ "from multiprocessing.pool import ThreadPool\n",
+ "import threading\n",
+ "import urllib.request\n",
+ "import difflib\n",
+ "import unidecode\n",
+ "import slugify\n",
+ "\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import pandas as pd\n",
+ "from scipy.io import loadmat\n",
+ "import numpy as np\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels')\n",
+ "from app.utils import api_utils, identity_utils\n",
+ "from app.settings import app_cfg\n",
+ "from app.settings import types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "/media/adam/ah8tb/work/megapixels_dev/env/google_knowledge_graph_api.env\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(app_cfg.FP_KNOWLEDGE_GRAPH_ENV)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Get List of Names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Kim Clijsters', 'William Rosenberg', 'John Brady', 'Juan Ignacio Chela', 'Floyd Keith', 'Sam Gerald', 'Imad Khadduri', 'Anna Kournikova', 'Jacques Rogge', 'Wilbert Elki Meza Majino']\n",
+ "['Kim_Clijsters', 'William_Rosenberg', 'John_Brady', 'Juan_Ignacio_Chela', 'Floyd_Keith', 'Sam_Gerald', 'Imad_Khadduri', 'Anna_Kournikova', 'Jacques_Rogge', 'Wilbert_Elki_Meza_Majino']\n"
+ ]
+ }
+ ],
+ "source": [
+ "names = identity_utils.get_names(types.Dataset.LFW)\n",
+ "print(names['names_query'][0:10])\n",
+ "print(names['names_orig'][0:10])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Google Knowledge Graph API\n",
+ "\n",
+ "- about 100.000 requests per 24 hours"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# read API key\n",
+ "\n",
+ "api_key = open(app_cfg.FP_KNOWLEDGE_GRAPH_ENV).read()\n",
+ "kg_api = api_utils.GoogleKnowledgeGraph(api_key)\n",
+ "wp_api = api_utils.WikipediaAPI()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Test API Access"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "wp\n",
+ "https://en.wikipedia.org/w/api.php?redirects=&ppprop=displaytitle&prop=pageprops%7Cpageimages%7Cdescription&generator=prefixsearch&action=query&format=json&piprop=thumbnail&pilimit=1&gpssearch=Vicente+Fox&gpsnamespace=0&gpslimit=1\n",
+ "{'wp_accessed': True,\n",
+ " 'wp_description': 'President of Mexico',\n",
+ " 'wp_name': 'Vicente Fox',\n",
+ " 'wp_page_id': '32836'}\n",
+ "kg\n",
+ "{'kg_accessed': True,\n",
+ " 'kg_bio': 'Vicente Fox Quesada, RSerafO is a Mexican businessman and '\n",
+ " 'politician who served as the 55th President of Mexico from 1 '\n",
+ " 'December 2000 to 30 November 2006.\\n',\n",
+ " 'kg_bio_url': 'https://en.wikipedia.org/wiki/Vicente_Fox',\n",
+ " 'kg_description': 'Former President of Mexico',\n",
+ " 'kg_error': '',\n",
+ " 'kg_id': '/m/081f4',\n",
+ " 'kg_image_url': 'http://t2.gstatic.com/images?q=tbn:ANd9GcQqs1Z0NhSLve9OyfdC0AHFWKWlTpHO4tCnU7dedSSz2kzCRk60',\n",
+ " 'kg_name': 'Vicente Fox',\n",
+ " 'kg_score': 610.987427,\n",
+ " 'kg_url': '',\n",
+ " 'query': 'Vicente Fox'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('wp----')\n",
+ "pprint(wp_api.get_meta({'query': 'Vicente Fox'}, verbose=True))\n",
+ "print('kg----')\n",
+ "pprint(kg_api.get_kg_from_name({'query':'Vicente Fox'}))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Test Name Similarity Matching"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 162,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.7714285714285716\n"
+ ]
+ }
+ ],
+ "source": [
+ "#print(identity_utils.names_match('Andréss Iniestas', 'Andres Iniestalossas Jr.', as_float=True))\n",
+ "#print(identity_utils.names_match('Adoor Gopalakrishnan', 'Adoors Gopalakarishnan', as_float=True))\n",
+ "#print(identity_utils.names_match('Dave Letterman', 'David Letterman', as_float=True))\n",
+ "print(identity_utils.names_match('Charles Dickens', 'Charles Boooker', as_float=True))\n",
+ "#print(identity_utils.names_match('Donald Trump', 'Donald J. Trump', as_float=True))\n",
+ "#print(identity_utils.names_match('Wang Fei', 'Fei Wang III', as_float=True))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# define thread mapping function\n",
+ "def pool_map_persons(obj):\n",
+ " global pbar\n",
+ " pbar.update(1)\n",
+ " kg_obj = kg_api.get_kg_from_name(obj)\n",
+ " wp_obj = wp_api.get_meta(obj)\n",
+ " person_obj = {**kg_obj, **wp_obj}\n",
+ " return person_obj\n",
+ "\n",
+ "def num_non_accessed(mps):\n",
+ " return sum(0 if (x.get('kg_accessed', False) and x.get('wp_accessed', False)) else 1 for x in mps)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load existing CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# load existing CSV\n",
+ "fp_csv = '/data_store_hdd/datasets/people/lfw/metadata/identity_kg.csv'\n",
+ "df = pd.read_csv(fp_csv, encoding = 'utf-16').set_index('index')\n",
+ "# fill nulls\n",
+ "df.fillna('', inplace = True)\n",
+ "mapped_persons = df.to_dict('records')\n",
+ "# add columns\n",
+ "for mp in mapped_persons:\n",
+ " mp['wp_error'] = ''\n",
+ " mp['kg_error'] = ''"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Get Knowledge Graph Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5507f5c19de746df94aa5445e3c7cf46",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=5749), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "832/5749 remaining\n",
+ "832/5749 remaining. Using 5 threads\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "411d08f873174d13a1de1f8b21f9f993",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=5749), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done. 0 remaining.\n"
+ ]
+ }
+ ],
+ "source": [
+ "num_threads_max = 5\n",
+ "sleep_min = 1\n",
+ "pbar = tqdm(total=len(mapped_persons))\n",
+ "\n",
+ "nna = num_non_accessed(mapped_persons)\n",
+ "print(f'{nna}/{len(mapped_persons)} remaining')\n",
+ "\n",
+ "# convert to thread pool\n",
+ "while nna > 0:\n",
+ " num_threads = max(1, min(num_threads_max, nna))\n",
+ " print(f'{nna}/{len(mapped_persons)} remaining. Using {num_threads} threads')\n",
+ " pool = ThreadPool(num_threads)\n",
+ "\n",
+ " # start threading\n",
+ " with tqdm(total=len(mapped_persons)) as pbar:\n",
+ " mapped_persons = pool.map(pool_map_persons, mapped_persons)\n",
+ "\n",
+ " # close tqdm\n",
+ " pbar.close()\n",
+ "\n",
+ " nna = num_non_accessed(mapped_persons)\n",
+ " if nna > 0:\n",
+ " print(f'{nna} remaining. Sleeping for {sleep_min} minutes...')\n",
+ " time.sleep(60 * sleep_min)\n",
+ "\n",
+ "print(f'Done. {nna} remaining.')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Get Wikipedia API data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 220,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "for i, mp in enumerate(mapped_persons):\n",
+ " kg_name = mp.get('kg_name')\n",
+ " wp_name = mp.get('wp_name')\n",
+ " query = mp.get('query')\n",
+ " name_orig = mp.get('source_name')\n",
+ " kg_score = int(mp.get('kg_score',0))\n",
+ "\n",
+ " kg_matches = same_person(name_orig, kg_name)\n",
+ " wp_matches = same_person(name_orig, wp_name)\n",
+ "\n",
+ " if kg_matches and wp_matches and kg_score > 100:\n",
+ " # very likely a match, confirm it\n",
+ " match_status = 2 # supermatch\n",
+ " # default to using wp because descriptions are more appropriate/udpated\n",
+ " source = 'wp'\n",
+ " elif kg_matches and wp_matches:\n",
+ " match_status = 1\n",
+ " # default to using wp because descriptions are more appropriate/udpated\n",
+ " source = 'wp'\n",
+ " elif kg_matches and not wp_matches:\n",
+ " # if the KG score is medium-high, but wp failed, needs review\n",
+ " source = 'kg'\n",
+ " match_status = 0\n",
+ " elif wp_matches and not kg_matches:\n",
+ " # if wikipedia text matched the query, then confirm\n",
+ " source = 'wp'\n",
+ " match_status = 0\n",
+ " else:\n",
+ " # no information available\n",
+ " match_status = -1\n",
+ " source = None\n",
+ " \n",
+ " slug = slugify.slugify(name_orig, separator='_')\n",
+ " mp_bio = mp.get('kg_bio', '')\n",
+ " wp_desc = mp.get('wp_description', '')\n",
+ " source_url = f\"http://vis-www.cs.umass.edu/lfw/person/{name_orig.replace(' ', '_')}.html\"\n",
+ " \n",
+ " if source == 'kg':\n",
+ " # google knowledge graph\n",
+ " mp_name = mp['kg_name']\n",
+ " mp_description = mp.get('kg_description', '')\n",
+ " elif source == 'wp':\n",
+ " # wikipedia\n",
+ " mp_name = mp['wp_name']\n",
+ " mp_description = mp.get('wp_description', '')\n",
+ " \n",
+ " if 'disambiguation' in wp_desc.lower():\n",
+ " #print(f\"disambiguate: {name_orig}\")\n",
+ " match_status = 0 # needs review if \"disambiguation appears\"\n",
+ " mp_name = ''\n",
+ " mp_description = ''\n",
+ " mp_bio = ''\n",
+ " \n",
+ " mp['source_url'] = source_url\n",
+ " mp['mp_slug'] = slug\n",
+ " mp['matched'] = match_status\n",
+ " mp['mp_bio'] = mp_bio\n",
+ " mp['mp_name'] = mp_name\n",
+ " mp['mp_description'] = mp_description"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 221,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "match: 4359\n",
+ "review: 718\n",
+ "fail: 672\n",
+ "no kg accessed: 0\n",
+ "no wp accessed: 0\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"match: {sum(1 if (x.get('matched') > 0) else 0 for x in mapped_persons)}\")\n",
+ "print(f\"review: {sum(1 if (x.get('matched') == 0) else 0 for x in mapped_persons)}\")\n",
+ "print(f\"fail: {sum(1 if (x.get('matched') == -1) else 0 for x in mapped_persons)}\")\n",
+ "\n",
+ "print(f\"no kg accessed: {sum(0 if (x.get('kg_accessed', False)) else 1 for x in mapped_persons)}\")\n",
+ "print(f\"no wp accessed: {sum(0 if (x.get('wp_accessed', False)) else 1 for x in mapped_persons)}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Save data to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 235,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# create dataframe for mapped persons\n",
+ "df_mapped_persons = pd.DataFrame.from_dict(mapped_persons)\n",
+ "df_mapped_persons.index.name = 'index'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 236,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# save\n",
+ "fp_out = f'/data_store_hdd/datasets/people/lfw/metadata/identity_kg.csv'\n",
+ "df_mapped_persons.drop(['kg_accessed', 'wp_accessed', 'kg_error', 'wp_error'], axis=1, inplace=True)\n",
+ "df_mapped_persons.to_csv(fp_out, encoding = 'utf-16')\n",
+ "# create small version\n",
+ "limit = 1000\n",
+ "fpp_out = Path(fp_out)\n",
+ "fp_out_sm = join(fpp_out.parent, f'{fpp_out.stem}_0_{limit}.csv')\n",
+ "df_mapped_persons_sm = pd.DataFrame.from_dict(mapped_persons[0:limit])\n",
+ "df_mapped_persons_sm.index.name = 'index'\n",
+ "df_mapped_persons_sm.to_csv(fp_out_sm, encoding = 'utf-16')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 237,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>kg_bio</th>\n",
+ " <th>kg_bio_url</th>\n",
+ " <th>kg_description</th>\n",
+ " <th>kg_id</th>\n",
+ " <th>kg_image_url</th>\n",
+ " <th>kg_name</th>\n",
+ " <th>kg_score</th>\n",
+ " <th>kg_url</th>\n",
+ " <th>matched</th>\n",
+ " <th>mp_bio</th>\n",
+ " <th>mp_description</th>\n",
+ " <th>mp_name</th>\n",
+ " <th>mp_slug</th>\n",
+ " <th>query</th>\n",
+ " <th>source</th>\n",
+ " <th>source_name</th>\n",
+ " <th>source_url</th>\n",
+ " <th>wp_description</th>\n",
+ " <th>wp_name</th>\n",
+ " <th>wp_page_id</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>index</th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " <th></th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>Kim Antonie Lode Clijsters is a Belgian former...</td>\n",
+ " <td>https://en.wikipedia.org/wiki/Kim_Clijsters</td>\n",
+ " <td>Belgian tennis player</td>\n",
+ " <td>/m/01m_gh</td>\n",
+ " <td>http://t3.gstatic.com/images?q=tbn:ANd9GcQ4yRK...</td>\n",
+ " <td>Kim Clijsters</td>\n",
+ " <td>618.272705</td>\n",
+ " <td></td>\n",
+ " <td>2</td>\n",
+ " <td>Kim Antonie Lode Clijsters is a Belgian former...</td>\n",
+ " <td>Belgian tennis player</td>\n",
+ " <td>Kim Clijsters</td>\n",
+ " <td>kim_clijsters</td>\n",
+ " <td>Kim Clijsters</td>\n",
+ " <td>lfw</td>\n",
+ " <td>Kim_Clijsters</td>\n",
+ " <td>http://vis-www.cs.umass.edu/lfw/person/Kim_Cli...</td>\n",
+ " <td>Belgian tennis player</td>\n",
+ " <td>Kim Clijsters</td>\n",
+ " <td>262793</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>William Rosenberg was an American entrepreneur...</td>\n",
+ " <td>https://en.wikipedia.org/wiki/William_Rosenberg</td>\n",
+ " <td>American entrepreneur</td>\n",
+ " <td>/m/07dy4z</td>\n",
+ " <td></td>\n",
+ " <td>William Rosenberg</td>\n",
+ " <td>367.879730</td>\n",
+ " <td></td>\n",
+ " <td>2</td>\n",
+ " <td>William Rosenberg was an American entrepreneur...</td>\n",
+ " <td>American businessman</td>\n",
+ " <td>William Rosenberg</td>\n",
+ " <td>william_rosenberg</td>\n",
+ " <td>William Rosenberg</td>\n",
+ " <td>lfw</td>\n",
+ " <td>William_Rosenberg</td>\n",
+ " <td>http://vis-www.cs.umass.edu/lfw/person/William...</td>\n",
+ " <td>American businessman</td>\n",
+ " <td>William Rosenberg</td>\n",
+ " <td>2.44981e+06</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " kg_bio \\\n",
+ "index \n",
+ "0 Kim Antonie Lode Clijsters is a Belgian former... \n",
+ "1 William Rosenberg was an American entrepreneur... \n",
+ "\n",
+ " kg_bio_url kg_description \\\n",
+ "index \n",
+ "0 https://en.wikipedia.org/wiki/Kim_Clijsters Belgian tennis player \n",
+ "1 https://en.wikipedia.org/wiki/William_Rosenberg American entrepreneur \n",
+ "\n",
+ " kg_id kg_image_url \\\n",
+ "index \n",
+ "0 /m/01m_gh http://t3.gstatic.com/images?q=tbn:ANd9GcQ4yRK... \n",
+ "1 /m/07dy4z \n",
+ "\n",
+ " kg_name kg_score kg_url matched \\\n",
+ "index \n",
+ "0 Kim Clijsters 618.272705 2 \n",
+ "1 William Rosenberg 367.879730 2 \n",
+ "\n",
+ " mp_bio \\\n",
+ "index \n",
+ "0 Kim Antonie Lode Clijsters is a Belgian former... \n",
+ "1 William Rosenberg was an American entrepreneur... \n",
+ "\n",
+ " mp_description mp_name mp_slug \\\n",
+ "index \n",
+ "0 Belgian tennis player Kim Clijsters kim_clijsters \n",
+ "1 American businessman William Rosenberg william_rosenberg \n",
+ "\n",
+ " query source source_name \\\n",
+ "index \n",
+ "0 Kim Clijsters lfw Kim_Clijsters \n",
+ "1 William Rosenberg lfw William_Rosenberg \n",
+ "\n",
+ " source_url \\\n",
+ "index \n",
+ "0 http://vis-www.cs.umass.edu/lfw/person/Kim_Cli... \n",
+ "1 http://vis-www.cs.umass.edu/lfw/person/William... \n",
+ "\n",
+ " wp_description wp_name wp_page_id \n",
+ "index \n",
+ "0 Belgian tennis player Kim Clijsters 262793 \n",
+ "1 American businessman William Rosenberg 2.44981e+06 "
+ ]
+ },
+ "execution_count": 237,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_mapped_persons.head(2)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}