{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# IMDB-WIKI Knowledge Graph" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "import os\n", "import os.path as osp\n", "from os.path import join\n", "from glob import glob\n", "import random\n", "import math\n", "from datetime import datetime\n", "import requests\n", "import json\n", "import urllib\n", "\n", "import cv2 as cv\n", "import pandas as pd\n", "from scipy.io import loadmat\n", "import numpy as np\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "from tqdm import tqdm_notebook as tqdm\n", "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Metadata" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "fp_meta = '/data_store_hdd/datasets/people/imdb_wiki/metadata/imdb_wiki.csv'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df_meta = pd.read_csv(fp_meta).set_index('index')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
celeb_iddobfilepathgendernamex1x2y1y2year_photo
index
064881900-5-1101/nm0000001_rm124825600_1899-5-10_1968.jpgmFred Astaire1072.9260001214.784000161.838000303.6960001968
164881900-5-1101/nm0000001_rm3343756032_1899-5-10_1970.jpgmFred Astaire477.184000622.592000100.352000245.7600001970
264881900-5-1101/nm0000001_rm577153792_1899-5-10_1968.jpgmFred Astaire114.969643451.686572114.969643451.6865721968
364881900-5-1101/nm0000001_rm946909184_1899-5-10_1968.jpgmFred Astaire622.885506844.339008424.217504645.6710061968
464881900-5-1101/nm0000001_rm980463616_1899-5-10_1968.jpgmFred Astaire1013.8590021201.586128233.882042421.6091681968
\n", "
" ], "text/plain": [ " celeb_id dob filepath \\\n", "index \n", "0 6488 1900-5-11 01/nm0000001_rm124825600_1899-5-10_1968.jpg \n", "1 6488 1900-5-11 01/nm0000001_rm3343756032_1899-5-10_1970.jpg \n", "2 6488 1900-5-11 01/nm0000001_rm577153792_1899-5-10_1968.jpg \n", "3 6488 1900-5-11 01/nm0000001_rm946909184_1899-5-10_1968.jpg \n", "4 6488 1900-5-11 01/nm0000001_rm980463616_1899-5-10_1968.jpg \n", "\n", " gender name x1 x2 y1 y2 \\\n", "index \n", "0 m Fred Astaire 1072.926000 1214.784000 161.838000 303.696000 \n", "1 m Fred Astaire 477.184000 622.592000 100.352000 245.760000 \n", "2 m Fred Astaire 114.969643 451.686572 114.969643 451.686572 \n", "3 m Fred Astaire 622.885506 844.339008 424.217504 645.671006 \n", "4 m Fred Astaire 1013.859002 1201.586128 233.882042 421.609168 \n", "\n", " year_photo \n", "index \n", "0 1968 \n", "1 1970 \n", "2 1968 \n", "3 1968 \n", "4 1968 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_meta.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ids" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "api_key = open('/work/megapixels_dev/3rdparty/knowledge-graph-api/.api_key').read()\n", "\n", "def get_knowledge(q, api_key):\n", " service_url = 'https://kgsearch.googleapis.com/v1/entities:search'\n", " params = {\n", " 'query': q,\n", " 'limit': 5,\n", " 'indent': True,\n", " 'key': api_key,\n", " }\n", " url = service_url + '?' + urllib.parse.urlencode(params) # TODO: use requests\n", " response = json.loads(urllib.request.urlopen(url).read())\n", " response = response.get('itemListElement', [])\n", " if len(response) > 0:\n", " result = response[0].get('result', [])\n", " result['score'] = response[0]['resultScore']\n", " return result\n", " else:\n", " return []" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "General Secretary of the Communist Party of China\n", "Xi Jinping\n" ] }, { "ename": "KeyError", "evalue": "'url'", "output_type": "error", "traceback": [ "\u001b[0;31m--------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'description'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'url'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'score'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'url'" ] } ], "source": [ "# test\n", "q = 'Xi Jinping'\n", "r = get_knowledge(q, api_key)\n", "print(r['description'])\n", "print(r['name'])\n", "print(r['url'])\n", "print(r['score'])" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "from pprint import pprint" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "kg:/m/06ff60\n" ] } ], "source": [ "print(r['@id'])" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'@id': 'kg:/g/11f4ksbzcm',\n", " '@type': ['Thing', 'Event'],\n", " 'detailedDescription': {'articleBody': 'On February 14, 2018, a gunman opened '\n", " 'fire at Marjory Stoneman Douglas High '\n", " 'School in Parkland, Florida, killing '\n", " 'seventeen students and staff members '\n", " 'and injuring seventeen others. ',\n", " 'license': 'https://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License',\n", " 'url': 'https://en.wikipedia.org/wiki/Stoneman_Douglas_High_School_shooting'},\n", " 'image': {'contentUrl': 'http://t1.gstatic.com/images?q=tbn:ANd9GcQmY7VqmGt4zEJU8Rc4EwPWroYd-L0QQ5wkZfiFO-WRqNBC-FPN',\n", " 'url': 'https://en.wikipedia.org/wiki/Stoneman_Douglas_High_School_shooting'},\n", " 'name': 'Stoneman Douglas High School shooting',\n", " 'score': 60.411652}\n" ] } ], "source": [ "pprint(r)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "dir_msceleb = '/data_store_hdd/datasets/people/msceleb/media/original/'" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "kgs_msceleb = os.listdir(dir_msceleb)\n", "kgs_msceleb = ['/' + x.replace('.','/') for x in kgs_msceleb]" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'/m/06ff60' in kgs_msceleb" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "def get_kg_by_id(kg_id, api_key):\n", " service_url = 'https://kgsearch.googleapis.com/v1/entities:search'\n", " params = {\n", " 'ids': kg_id,\n", " 'limit': 1,\n", " 'indent': True,\n", " 'key': api_key,\n", " }\n", " url = service_url + '?' + urllib.parse.urlencode(params) # TODO: use requests\n", " try:\n", " response = json.loads(urllib.request.urlopen(url).read())\n", " response = response.get('itemListElement', [])\n", " result = response[0].get('result', [])\n", " result['score'] = response[0]['resultScore']\n", " return result\n", " except Exception as e:\n", " return []" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "a = get_kg_by_id('/m/0100n5bs', api_key)" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:megapixels]", "language": "python", "name": "conda-env-megapixels-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }