summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/names_kg.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/names_kg.ipynb')
-rw-r--r--megapixels/notebooks/datasets/names_kg.ipynb243
1 files changed, 0 insertions, 243 deletions
diff --git a/megapixels/notebooks/datasets/names_kg.ipynb b/megapixels/notebooks/datasets/names_kg.ipynb
deleted file mode 100644
index ab4edc4b..00000000
--- a/megapixels/notebooks/datasets/names_kg.ipynb
+++ /dev/null
@@ -1,243 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Find Knowledge Graph Names"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "import os.path as osp\n",
- "from os.path import join\n",
- "from glob import glob\n",
- "import random\n",
- "import math\n",
- "import time\n",
- "from datetime import datetime\n",
- "\n",
- "import requests\n",
- "\n",
- "import json\n",
- "import urllib\n",
- "from multiprocessing.pool import ThreadPool\n",
- "import threading\n",
- "from urllib.request import urlopen\n",
- "import urllib.request\n",
- "\n",
- "\n",
- "import cv2 as cv\n",
- "import pandas as pd\n",
- "from scipy.io import loadmat\n",
- "import numpy as np\n",
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "from tqdm import tqdm_notebook as tqdm\n",
- "%reload_ext autoreload\n",
- "%autoreload 2\n",
- "import sys\n",
- "sys.path.append('/work/megapixels_dev/megapixels/')\n",
- "from app.utils import file_utils, im_utils"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Load Metadata"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "dir_msceleb = '/data_store_hdd/datasets/people/msceleb/media/original/'\n",
- "kg_ids_msceleb = [x.replace('m.', '/m/') for x in os.listdir(dir_msceleb)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "api_key = open('/work/megapixels_dev/3rdparty/knowledge-graph-api/.api_key').read()\n",
- "url_kg_api = 'https://kgsearch.googleapis.com/v1/entities:search'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_kg_from_kg_obj(obj):\n",
- " # TODO detect 503 service unavailable\n",
- " timeout_error_msg = b'HTTP Error 503: Service Unavailable'\n",
- " url_error_msg = b'HTTP Error 400: Bad Request'\n",
- " global api_key, url_kg_api\n",
- " kg_id = obj['kg_id']\n",
- " params = {\n",
- " 'ids': kg_id,\n",
- " 'limit': 1,\n",
- " 'indent': True,\n",
- " 'key': api_key,\n",
- " }\n",
- " url = f'{url_kg_api}?{urllib.parse.urlencode(params)}'\n",
- " result = {'kg_id': kg_id, 'score': 0.0, 'description': '', 'url':'', 'accessed': False} # default\n",
- " try:\n",
- " json_response = urllib.request.urlopen(url).read()\n",
- " except Exception as e:\n",
- " result['error'] = str(e)\n",
- " else:\n",
- " try:\n",
- " response = json.loads(json_response)\n",
- " items = response.get('itemListElement', [])\n",
- " result['accessed'] = True\n",
- " if items:\n",
- " item = items[0]\n",
- " item_result = item.get('result', [])\n",
- " result['description'] = item_result.get('description', '')\n",
- " det_desc = item_result.get('detailedDescription', '')\n",
- " if det_desc:\n",
- " result['description_extended'] = det_desc.get('articleBody','')\n",
- " result['description_license'] = det_desc.get('license','')\n",
- " result['description_url'] = det_desc.get('url','')\n",
- " else:\n",
- " result['description_extended'] = ''\n",
- " result['description_license'] = ''\n",
- " result['description_url'] = ''\n",
- " result_img = item_result.get('image', '')\n",
- " if result_img:\n",
- " result['image_url'] = result_img.get('contentUrl', '')\n",
- " result['name'] = item_result.get('name', '')\n",
- " result['score'] = item.get('resultScore', 0.0)\n",
- " result['url'] = item_result.get('url', '')\n",
- " except Exception as e:\n",
- " result['error'] = str(e)\n",
- " return result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "unmapped_persons = [{'kg_id': x} for x in kg_ids_msceleb]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "2d0733764379489aa82ed20f20edbb9b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "HBox(children=(IntProgress(value=0, max=93418), HTML(value='')))"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "f38b47614c5b4894b7e026b6a46a5057",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "HBox(children=(IntProgress(value=0, max=93418), HTML(value='')))"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "opt_threads = 10\n",
- "pbar = tqdm(total=len(unmapped_persons))\n",
- "\n",
- "# define thread mapping function\n",
- "def pool_map_persons(obj):\n",
- " global pbar\n",
- " pbar.update(1)\n",
- " kg_obj = get_kg_from_kg_obj(obj)\n",
- " return kg_obj\n",
- "\n",
- "# convert to thread pool\n",
- "mapped_persons = []\n",
- "pool = ThreadPool(opt_threads)\n",
- "\n",
- "# start threading\n",
- "with tqdm(total=len(unmapped_persons)) as pbar:\n",
- " mapped_persons = pool.map(pool_map_persons, unmapped_persons)\n",
- "\n",
- "# close tqdm\n",
- "pbar.close()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "```\n",
- "{'@id': 'kg:/m/01008l96',\n",
- " 'name': 'Mohamed Guessous',\n",
- " '@type': ['Thing', 'Person'],\n",
- " 'description': 'Moroccan sociologist',\n",
- " 'image': {'contentUrl': 'http://t2.gstatic.com/images?q=tbn:ANd9GcTAHGBU-4ZzSqcMbDPnSHZA10u0L9Hnppdvt_AnFdQzOYnS0aHM',\n",
- " 'url': 'https://en.wikipedia.org/wiki/Mohamed_Guessous'},\n",
- " 'detailedDescription': {'articleBody': 'Mohamed Guessous was a Moroccan sociologist. He was also an active politician in the Socialist Union of Popular Forces.',\n",
- " 'url': 'https://en.wikipedia.org/wiki/Mohamed_Guessous',\n",
- " 'license': 'https://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License'},\n",
- " 'score': 11.046742}\n",
- " ```"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python [conda env:megapixels]",
- "language": "python",
- "name": "conda-env-megapixels-py"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}