summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb')
-rw-r--r--megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb1281
1 files changed, 1281 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb b/megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb
new file mode 100644
index 00000000..c078a058
--- /dev/null
+++ b/megapixels/notebooks/datasets/ijb_c/ijb_c_mids.ipynb
@@ -0,0 +1,1281 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# IJB-C Check MS Celeb MIDS\n",
+ "\n",
+ "- read in MS Celeb Master file\n",
+ "- read in MS Celeb clean file\n",
+ "- read in IJB-C name list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "from os.path import join\n",
+ "import math\n",
+ "from glob import glob\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels/')\n",
+ "from app.settings import app_cfg as cfg\n",
+ "from app.utils import identity_utils"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = '\"Arnold Rüütel\"@ca'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "b = a.split('\"')[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9545454545454546"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_msceleb_full = '/data_store/datasets/people/msceleb/dataset/Top1M_MidList.Name.csv'\n",
+ "#fp_msceleb_clean = '/data_store/datasets/people/msceleb/dataset/MS-Celeb-1M_clean_list.txt'\n",
+ "fp_ijbc = '/data_store/datasets/people/ijb_c/downloads/ijbc_subject_names.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_msceleb_full = pd.read_csv(fp_msceleb_full, sep=',', names=[\"kg_id\", 'name_msceleb'], quotechar = ',', skipinitialspace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "records_msceleb = df_msceleb_full.to_dict('records')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df_msceleb_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_ijbc = pd.read_csv(fp_ijbc)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "records_ijbc = df_ijbc.to_dict('records')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 166,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df_ijbc.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pre-compute msceleb data\n",
+ "for i, msceleb_item in enumerate(records_msceleb.copy()):\n",
+ " splits = msceleb_item['name_msceleb'].split('\"')\n",
+ " msceleb_name = splits[1]\n",
+ " try:\n",
+ " lang = splits[2][1:]\n",
+ " except Exception as e:\n",
+ " lang = ''\n",
+ " records_msceleb[i]['name_clean'] = msceleb_name\n",
+ " records_msceleb[i]['lang'] = lang\n",
+ " records_msceleb[i]['name_stripped_str'] = identity_utils.sanitize_name(msceleb_name, as_str=True)\n",
+ " records_msceleb[i]['name_stripped_arr'] = identity_utils.sanitize_name(msceleb_name, as_str=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'kg_id': 'm.01008lp2',\n",
+ " 'name_msceleb': '\"Caio Henrique Siqueira Sanchez\"@en',\n",
+ " 'name_clean': 'Caio Henrique Siqueira Sanchez',\n",
+ " 'name_stripped': ['caio', 'henrique', 'siqueira', 'sanchez'],\n",
+ " 'name_stripped_str': 'caio henrique siqueira sanchez',\n",
+ " 'name_stripped_arr': ['caio', 'henrique', 'siqueira', 'sanchez'],\n",
+ " 'lang': 'en'}"
+ ]
+ },
+ "execution_count": 160,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "records_msceleb[10]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1fce6cb2b3eb4cf294a794a27209b312",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=3531), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "found_names = []\n",
+ "unfound_names = []\n",
+ "\n",
+ "for ijbc_item in tqdm(records_ijbc):\n",
+ " ijbc_name = ijbc_item.get('SUBJECT_NAME')\n",
+ " \n",
+ " # ensure name does not exist\n",
+ " if ijbc_name in found_names or ijbc_name in unfound_names:\n",
+ " continue\n",
+ "\n",
+ " # init loop vars\n",
+ " found = False\n",
+ " item_match = None\n",
+ " \n",
+ " # first look through all msceleb records for simple match\n",
+ " for msceleb_item in records_msceleb:\n",
+ " msceleb_name_clean = msceleb_item['name_clean']\n",
+ " if ijbc_name.lower() in msceleb_name_clean.lower():\n",
+ " item_match = msceleb_item\n",
+ " break\n",
+ " \n",
+ " # if not, do more aggressive/cpu intensive matching\n",
+ " if not item_match:\n",
+ " ijbc_name_stripped = identity_utils.sanitize_name(ijbc_name, as_str=False)\n",
+ " for msceleb_item in records_msceleb:\n",
+ " msceleb_name_stripped = msceleb_item['name_stripped_arr']\n",
+ " score = identity_utils.names_match(ijbc_name_stripped, msceleb_name_stripped, as_float=True, name_a_pre=True, name_b_pre=True)\n",
+ " if score > 0.9:\n",
+ " item_match = msceleb_item\n",
+ " break\n",
+ " \n",
+ " # default match obj\n",
+ " match_obj = {'name_ijb_c': ijbc_name, 'kg_id': '', 'name_msceleb': '', 'lang_msceleb': ''}\n",
+ " \n",
+ " if item_match:\n",
+ " # append matched info from msceleb\n",
+ " match_obj['name_msceleb'] = item_match['name_msceleb'] # keep orig msceleb name\n",
+ " match_obj['lang_msceleb'] = item_match['lang']\n",
+ " match_obj['kg_id'] = item_match.get('kg_id').replace('m.', '/m/')\n",
+ " found_names.append(match_obj)\n",
+ " else:\n",
+ " # default obj\n",
+ " unfound_names.append(match_obj)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3299 232\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(len(found_names), len(unfound_names))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pprint import pprint"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 170,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[{'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Raffaele Bonanni',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'King Abdullah II',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'George HW Bush',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Edmund Stoiber',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'JK Rowling',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ashton B. Carter',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Linah K. Mohohlo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': \"Alhaji Yar'Adua\",\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Hesham Mohamed Qandil',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Stephen Harper',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Martin Dempsey',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Horst Teltschik',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Andri Piebalgu',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Donald Tusk',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Richard Nixon',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Zulu Araujo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Eliana Calmon',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Edu Guedes',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Milene Uehara',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Celso Zucatelli',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jose Roberto Arruda',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Igor Slyunyayev',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Yevhenia Tymoshenko',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ernest Bower',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Seo Joo-hyun',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Doreen Lorenzo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Patrick Leahy',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Maria Soledad Alvear Valenzuela',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mario Kreutzberger',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ken Salazar',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Tim Allen',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mark A. Welsh III',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alessandro Molon',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Omobola Johnson',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jon Hamm',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Tammy Baldwin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Rajiv Shah',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Tedros Adhanom Ghebreyesus',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jerry Garcia',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Arlette Chabot',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Pedro Aznar',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Prakash Raj',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alex Jones',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gloria Álvarez',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Lakshmi Rai',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Waseem Abbas',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alicia Castro',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Aminata Traoré',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Paola Taverna',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Sanjay Kapoor',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '', 'lang_msceleb': '', 'name_ijb_c': 'AJ Lee', 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Andressa Urach',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Baby Margaretha',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Bob Garcia',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Dalila Nesci',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Dirk Müller',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mahesh Babu',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Maram al-Masri',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Minh Tran Huy',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Andressa Soares',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Barbara Lezzi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Eesha Koppikhar',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gabriel Jesus',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gayle San',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kaajal Oza Vaidya',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Marcelo Freixo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Óscar Santos',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Sam Smith',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Naomi Klein',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Aditi Arya',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alberto Garzón',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Anna Ráckevei',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Eleonora Menicucci',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Helvy Tiana Rosa',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Houda-Imane Faraoun',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Karen Paola',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Laura Bottici',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Laura Castelli',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mihai Voicu',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Patti Smith',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Puan Maharani',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Savitha Sastry',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Sharon la Hechicera',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Bahram Moshiri',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Luz Salgado',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Nouria Benghabrit-Remaoun',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Sant Asaram Ji Bapu',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Selin Sayek Böke',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Angelina Love',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Anna Aaron',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Anna Kendrick',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Carla Ruocco',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Deepak Chopra',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Giulia Grillo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Maine Mendoza',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'MC Pedrinho',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mònica Oltra',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Rahul Gandhi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Saad Lamjarred',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Song So-hee',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Sơn Tùng M-TP',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Susi Pudjiastuti',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Fabiana Dadone',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Giulia Di Vita',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Juan Carlos De Martin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Luigi Di Maio',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Money Boy',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Retno Marsudi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alejandro Valverde',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alek Skarlatos',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alexa Clay',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Alice Zeniter',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Aminta Granera',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Andrés Palomino',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Anita Anand',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ann Harding',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Anne Bouverot',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Antoine Compagnon',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Antoine Westermann',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Axel Kahn',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ayman Odeh',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Bastien Vivès',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Betty Cantrell',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Blake Griffin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Blossom Chukwujekwu',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Camilla Toulmin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Carlos Reichenbach',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Cristina Pedroche',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Cristóbal Cobo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Céline Curiol',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Daniel Bilalian',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Daniel R. Russel',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'David Kobia',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Dominique Manotti',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Dragan Bender',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Edgar Morin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Eka Zguladze',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Erdős Virág',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ernestina Naadu Mills',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Fiona Wood',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Franck Pavloff',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Franck Thilliez',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Frank Fabra',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Fred Swaniker',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gabe Zichermann',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gherardo Colombo',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gilles Verdez',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Gizele Thakral',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Grady Jarrett',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Helen Arney',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Hessa Al Jaber',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Hiromi Uehara',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Houlin Zhao',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Hyvin Jepkemoi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Inguna Sudraba',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Iványi Gábor',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jackie Kay',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jamshyd Godrej',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jay Vinchi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jayati Ghosh',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jeffrey Tucker',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Jens Lehmann',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Josh Fox',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Josh Smith',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'João Sayad',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Juan Carlos Monedero',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kalyan Varma',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kate Brown',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kate Clinton',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kawlo Iyun Pacidal',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Kelly Benoit-Bird',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Khem Veasna',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Laila Al-Arian',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Leila Chudori',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Lhadon Tethong',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Lil Kesh',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Maguy Bou Ghosn',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Makase Nyaphisi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Malek Chebel',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mandana Karimi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Manuela Carmena',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Marc Laménie',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Marina Ruy Barbosa',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Marion Montaigne',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Marisa Matias',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mark Steyn',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mauricio Rodas',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mawra Hocane',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Melissa Gira Grant',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mhairi Black',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Michel Bauwens',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Michel Bussi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Morgan Marquis-Boire',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Mrinal Kulkarni',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Naomi Shelton',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Natalio Botana',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Nguyễn Thị Kim Ngân',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Nina Tandon',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Nina Turner',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Nyle DiMarco',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Okyeame Kwame',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Pamela Samuelson',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Papis Loveday',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Patrick Pelloux',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Piya Sorcar',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Rajkummar Rao',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Rami Ranger',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Raul Krauthausen',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Richard Nguema',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Robert Lefkowitz',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ronnie Ash',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ruby Yadav',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ryan Crocker',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Ryu Jun-yeol',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Samiksha Bhatnagar',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Shantell Martin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Shiho Yano',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Shirin Sharmin Chaudhury',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Spencer Stone',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Takaaki Kajita',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Tavis Smiley',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Tope Folarin',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Virginia Raggi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Walidah Imarisha',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Wes Schweitzer',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '', 'lang_msceleb': '', 'name_ijb_c': 'Win Tin', 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Wutt Hmone Shwe Yi',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Yassine Brahim',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Yemi Alade',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Young Guru',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Zakhar May',\n",
+ " 'name_msceleb': ''},\n",
+ " {'kg_id': '',\n",
+ " 'lang_msceleb': '',\n",
+ " 'name_ijb_c': 'Zoë Keating',\n",
+ " 'name_msceleb': ''}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(unfound_names)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Save CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 171,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_ijbc_found = pd.DataFrame.from_dict(found_names)\n",
+ "df_ijbc_unfound = pd.DataFrame.from_dict(unfound_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 172,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_found = '/data_store/datasets/people/ijb_c/processed/ijb_c_msceleb_found.csv'\n",
+ "fp_unfound = '/data_store/datasets/people/ijb_c/processed/ijb_c_msceleb_unfound.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_ijbc_found.to_csv(fp_found, index=False)\n",
+ "df_ijbc_unfound.to_csv(fp_unfound, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}