diff options
| author | adamhrv <adam@ahprojects.com> | 2019-01-28 18:11:36 +0100 |
|---|---|---|
| committer | adamhrv <adam@ahprojects.com> | 2019-01-28 18:11:36 +0100 |
| commit | dd2c36288aa1e8af14588f9258f6785879b8638c (patch) | |
| tree | 543564ff7cc9b83ae1ecbc5b0d89bca9a6c17742 /megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb | |
| parent | b0b06be0defe97ef19cf4d0f3328db40d299e110 (diff) | |
add utils for analyzing identities
Diffstat (limited to 'megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb | 468 |
1 files changed, 0 insertions, 468 deletions
diff --git a/megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb b/megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb deleted file mode 100644 index b9a77fda..00000000 --- a/megapixels/notebooks/datasets/imdb_wiki/imdb_wiki_kg.ipynb +++ /dev/null @@ -1,468 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# IMDB-WIKI Knowledge Graph" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import os.path as osp\n", - "from os.path import join\n", - "from glob import glob\n", - "import random\n", - "import math\n", - "from datetime import datetime\n", - "import requests\n", - "import json\n", - "import urllib\n", - "\n", - "import cv2 as cv\n", - "import pandas as pd\n", - "from scipy.io import loadmat\n", - "import numpy as np\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from tqdm import tqdm_notebook as tqdm\n", - "%reload_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Metadata" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "fp_meta = '/data_store_hdd/datasets/people/imdb_wiki/metadata/imdb_wiki.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "df_meta = pd.read_csv(fp_meta).set_index('index')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>celeb_id</th>\n", - " <th>dob</th>\n", - " <th>filepath</th>\n", - " <th>gender</th>\n", - " <th>name</th>\n", - " <th>x1</th>\n", - " <th>x2</th>\n", - " <th>y1</th>\n", - " <th>y2</th>\n", - " <th>year_photo</th>\n", - " </tr>\n", - " <tr>\n", - " <th>index</th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " <th></th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>6488</td>\n", - " <td>1900-5-11</td>\n", - " <td>01/nm0000001_rm124825600_1899-5-10_1968.jpg</td>\n", - " <td>m</td>\n", - " <td>Fred Astaire</td>\n", - " <td>1072.926000</td>\n", - " <td>1214.784000</td>\n", - " <td>161.838000</td>\n", - " <td>303.696000</td>\n", - " <td>1968</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>6488</td>\n", - " <td>1900-5-11</td>\n", - " <td>01/nm0000001_rm3343756032_1899-5-10_1970.jpg</td>\n", - " <td>m</td>\n", - " <td>Fred Astaire</td>\n", - " <td>477.184000</td>\n", - " <td>622.592000</td>\n", - " <td>100.352000</td>\n", - " <td>245.760000</td>\n", - " <td>1970</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>6488</td>\n", - " <td>1900-5-11</td>\n", - " <td>01/nm0000001_rm577153792_1899-5-10_1968.jpg</td>\n", - " <td>m</td>\n", - " <td>Fred Astaire</td>\n", - " <td>114.969643</td>\n", - " <td>451.686572</td>\n", - " <td>114.969643</td>\n", - " <td>451.686572</td>\n", - " <td>1968</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>6488</td>\n", - " <td>1900-5-11</td>\n", - " <td>01/nm0000001_rm946909184_1899-5-10_1968.jpg</td>\n", - " <td>m</td>\n", - " <td>Fred Astaire</td>\n", - " <td>622.885506</td>\n", - " <td>844.339008</td>\n", - " <td>424.217504</td>\n", - " <td>645.671006</td>\n", - " <td>1968</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>6488</td>\n", - " <td>1900-5-11</td>\n", - " <td>01/nm0000001_rm980463616_1899-5-10_1968.jpg</td>\n", - " <td>m</td>\n", - " <td>Fred Astaire</td>\n", - " <td>1013.859002</td>\n", - " <td>1201.586128</td>\n", - " <td>233.882042</td>\n", - " <td>421.609168</td>\n", - " <td>1968</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " celeb_id dob filepath \\\n", - "index \n", - "0 6488 1900-5-11 01/nm0000001_rm124825600_1899-5-10_1968.jpg \n", - "1 6488 1900-5-11 01/nm0000001_rm3343756032_1899-5-10_1970.jpg \n", - "2 6488 1900-5-11 01/nm0000001_rm577153792_1899-5-10_1968.jpg \n", - "3 6488 1900-5-11 01/nm0000001_rm946909184_1899-5-10_1968.jpg \n", - "4 6488 1900-5-11 01/nm0000001_rm980463616_1899-5-10_1968.jpg \n", - "\n", - " gender name x1 x2 y1 y2 \\\n", - "index \n", - "0 m Fred Astaire 1072.926000 1214.784000 161.838000 303.696000 \n", - "1 m Fred Astaire 477.184000 622.592000 100.352000 245.760000 \n", - "2 m Fred Astaire 114.969643 451.686572 114.969643 451.686572 \n", - "3 m Fred Astaire 622.885506 844.339008 424.217504 645.671006 \n", - "4 m Fred Astaire 1013.859002 1201.586128 233.882042 421.609168 \n", - "\n", - " year_photo \n", - "index \n", - "0 1968 \n", - "1 1970 \n", - "2 1968 \n", - "3 1968 \n", - "4 1968 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_meta.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ids" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "api_key = open('/work/megapixels_dev/3rdparty/knowledge-graph-api/.api_key').read()\n", - "\n", - "def get_knowledge(q, api_key):\n", - " service_url = 'https://kgsearch.googleapis.com/v1/entities:search'\n", - " params = {\n", - " 'query': q,\n", - " 'limit': 5,\n", - " 'indent': True,\n", - " 'key': api_key,\n", - " }\n", - " url = service_url + '?' + urllib.parse.urlencode(params) # TODO: use requests\n", - " response = json.loads(urllib.request.urlopen(url).read())\n", - " response = response.get('itemListElement', [])\n", - " if len(response) > 0:\n", - " result = response[0].get('result', [])\n", - " result['score'] = response[0]['resultScore']\n", - " return result\n", - " else:\n", - " return []" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "General Secretary of the Communist Party of China\n", - "Xi Jinping\n" - ] - }, - { - "ename": "KeyError", - "evalue": "'url'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m--------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-106-654588fe3a11>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'description'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'url'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'score'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'url'" - ] - } - ], - "source": [ - "# test\n", - "q = 'Xi Jinping'\n", - "r = get_knowledge(q, api_key)\n", - "print(r['description'])\n", - "print(r['name'])\n", - "print(r['url'])\n", - "print(r['score'])" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [], - "source": [ - "from pprint import pprint" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "kg:/m/06ff60\n" - ] - } - ], - "source": [ - "print(r['@id'])" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'@id': 'kg:/g/11f4ksbzcm',\n", - " '@type': ['Thing', 'Event'],\n", - " 'detailedDescription': {'articleBody': 'On February 14, 2018, a gunman opened '\n", - " 'fire at Marjory Stoneman Douglas High '\n", - " 'School in Parkland, Florida, killing '\n", - " 'seventeen students and staff members '\n", - " 'and injuring seventeen others. ',\n", - " 'license': 'https://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License',\n", - " 'url': 'https://en.wikipedia.org/wiki/Stoneman_Douglas_High_School_shooting'},\n", - " 'image': {'contentUrl': 'http://t1.gstatic.com/images?q=tbn:ANd9GcQmY7VqmGt4zEJU8Rc4EwPWroYd-L0QQ5wkZfiFO-WRqNBC-FPN',\n", - " 'url': 'https://en.wikipedia.org/wiki/Stoneman_Douglas_High_School_shooting'},\n", - " 'name': 'Stoneman Douglas High School shooting',\n", - " 'score': 60.411652}\n" - ] - } - ], - "source": [ - "pprint(r)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "dir_msceleb = '/data_store_hdd/datasets/people/msceleb/media/original/'" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "kgs_msceleb = os.listdir(dir_msceleb)\n", - "kgs_msceleb = ['/' + x.replace('.','/') for x in kgs_msceleb]" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "'/m/06ff60' in kgs_msceleb" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [], - "source": [ - "def get_kg_by_id(kg_id, api_key):\n", - " service_url = 'https://kgsearch.googleapis.com/v1/entities:search'\n", - " params = {\n", - " 'ids': kg_id,\n", - " 'limit': 1,\n", - " 'indent': True,\n", - " 'key': api_key,\n", - " }\n", - " url = service_url + '?' + urllib.parse.urlencode(params) # TODO: use requests\n", - " try:\n", - " response = json.loads(urllib.request.urlopen(url).read())\n", - " response = response.get('itemListElement', [])\n", - " result = response[0].get('result', [])\n", - " result['score'] = response[0]['resultScore']\n", - " return result\n", - " except Exception as e:\n", - " return []" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "a = get_kg_by_id('/m/0100n5bs', api_key)" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:megapixels]", - "language": "python", - "name": "conda-env-megapixels-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} |
