summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb')
-rw-r--r--megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb352
1 files changed, 352 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb b/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb
new file mode 100644
index 00000000..6450e0d9
--- /dev/null
+++ b/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Test Flickr API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "from os.path import join\n",
+ "from glob import glob\n",
+ "from pathlib import Path\n",
+ "import requests\n",
+ "import json\n",
+ "from pprint import pprint\n",
+ "from multiprocessing.pool import ThreadPool\n",
+ "import threading\n",
+ "import urllib.request\n",
+ "import hashlib\n",
+ "\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from random import randint\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels')\n",
+ "from app.utils import api_utils, identity_utils\n",
+ "\n",
+ "from app.settings import app_cfg\n",
+ "from app.settings import types\n",
+ "\n",
+ "import flickr_api # pip install flickr_api\n",
+ "from flickr_api.flickrerrors import FlickrAPIError"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create CSV for API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_in = '/data_store/datasets/people/pipa/downloads/annotations/index.txt'\n",
+ "fp_out = '/data_store_hdd/datasets/people/pipa/research/flickr_metadata/'\n",
+ "fp_out_photo_ids = '/data_store/datasets/people/pipa/research/pipa_photo_ids.csv'\n",
+ "fp_out_photoset_ids = '/data_store/datasets/people/pipa/research/pipa_photoset_ids.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "names = ['photoset_id', 'photo_id', 'xmin', 'ymin', 'height', 'identity_id', 'subset_id']\n",
+ "df_records = pd.read_csv(fp_in, delimiter=' ', names=names, index_col=False)\n",
+ "df_records.drop(['xmin', 'ymin', 'height', 'identity_id', 'subset_id'], axis=1, inplace=True)\n",
+ "records = df_records.to_dict('records')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "```\n",
+ "<photoset_id> <photo_id> <xmin> <ymin> <width> <height> <identity_id> <subset_id>\n",
+ "The subset IDs are 0 for leftover, 1 for train, 2 for validation, 3 for test\n",
+ "\n",
+ "The photos are located in the images directory, in the form <photoset_id>_<photo_id>.jpg\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "photo_ids = []\n",
+ "for record in records:\n",
+ " fp = '{}.json'.format(record['photo_id'])\n",
+ " photo_ids.append({'filepath': fp, 'query': record['photo_id']}) \n",
+ "df_photo_ids = pd.DataFrame.from_dict(photo_ids)\n",
+ "df_photo_ids.to_csv(fp_out_photo_ids, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "photoset_ids = []\n",
+ "for record in records:\n",
+ " fp = '{}.json'.format(record['photoset_id'])\n",
+ " photoset_ids.append({'filepath': fp, 'query': record['photoset_id']}) \n",
+ "df_photoset_ids = pd.DataFrame.from_dict(photoset_ids)\n",
+ "df_photoset_ids.to_csv(fp_out_photoset_ids, index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Test Flickr API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "FLICKR_API_KEY = '31cae6cbba7020585a34bf5fcd772e16'\n",
+ "FLICKR_API_SECRET = '4688b458314b2e09'\n",
+ "\n",
+ "# init Flickr API\n",
+ "flickr_api.set_keys(api_key=FLICKR_API_KEY, api_secret=FLICKR_API_SECRET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1808093328\n",
+ "72157607317745768\n"
+ ]
+ }
+ ],
+ "source": [
+ "photo_id = records[0]['photo_id']\n",
+ "print(photo_id)\n",
+ "print(records[0]['photoset_id'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "photo = flickr_api.Photo(id=photo_id)\n",
+ "info = photo.getInfo()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pprint import pprint"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=31cae6cbba7020585a34bf5fcd772e16&photo_id=6796778203&format=json&nojsoncallback=1'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'http://some-address.com/api/This+is+a+test'"
+ ]
+ },
+ "execution_count": 125,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from requests.compat import urljoin, quote_plus\n",
+ "url = \"http://some-address.com/api/\"\n",
+ "term = 'This is a test'\n",
+ "flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=31cae6cbba7020585a34bf5fcd772e16&photo_id=6796778203&format=json&nojsoncallback=1'\n",
+ "\n",
+ "urljoin(url, quote_plus(term))\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from urllib.request.u"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'photo': {'id': '6796778203', 'secret': 'e2c0873305', 'server': '7033', 'farm': 8, 'dateuploaded': '1328033443', 'isfavorite': 0, 'license': '0', 'safety_level': '0', 'rotation': 0, 'owner': {'nsid': '62559061@N06', 'username': 'Joybot', 'realname': '', 'location': 'United Kingdom', 'iconserver': '4503', 'iconfarm': 5, 'path_alias': 'joybot'}, 'title': {'_content': 'Unfortunate hair thanks to sudden gust of wind and lack of hair straighteners'}, 'description': {'_content': 'I think this was before the time of widespread hair straighteners.\\nWhat a wonderful modern world we now live in.\\n\\nPolhawn Fort, Cornwall, UK.\\nFuji colour negative film, 100ISO, Kodak Box Brownie (620 format, cropped).'}, 'visibility': {'ispublic': 1, 'isfriend': 0, 'isfamily': 0}, 'dates': {'posted': '1328033443', 'taken': '2000-12-01 00:00:00', 'takengranularity': '4', 'takenunknown': 0, 'lastupdate': '1348236375'}, 'views': '438', 'editability': {'cancomment': 0, 'canaddmeta': 0}, 'publiceditability': {'cancomment': 1, 'canaddmeta': 0}, 'usage': {'candownload': 0, 'canblog': 0, 'canprint': 0, 'canshare': 0}, 'comments': {'_content': '0'}, 'notes': {'note': []}, 'people': {'haspeople': 0}, 'tags': {'tag': [{'id': '62513739-6796778203-201512', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '620', '_content': '620', 'machine_tag': 0}, {'id': '62513739-6796778203-19713', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'roll', '_content': 'roll', 'machine_tag': 0}, {'id': '62513739-6796778203-664', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'film', '_content': 'film', 'machine_tag': 0}, {'id': '62513739-6796778203-19853', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'brownie', '_content': 'brownie', 'machine_tag': 0}, {'id': '62513739-6796778203-4689', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'kodak', '_content': 'kodak', 'machine_tag': 0}, {'id': '62513739-6796778203-4845', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'box', '_content': 'box', 'machine_tag': 0}, {'id': '62513739-6796778203-368625', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'box brownie', '_content': 'boxbrownie', 'machine_tag': 0}, {'id': '62513739-6796778203-545', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'colour', '_content': 'colour', 'machine_tag': 0}, {'id': '62513739-6796778203-544', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'color', '_content': 'color', 'machine_tag': 0}, {'id': '62513739-6796778203-1606', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'print', '_content': 'print', 'machine_tag': 0}, {'id': '62513739-6796778203-452117', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'c41', '_content': 'c41', 'machine_tag': 0}, {'id': '62513739-6796778203-867', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'girl', '_content': 'girl', 'machine_tag': 0}, {'id': '62513739-6796778203-4461', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'wind', '_content': 'wind', 'machine_tag': 0}, {'id': '62513739-6796778203-347', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'woman', '_content': 'woman', 'machine_tag': 0}, {'id': '62513739-6796778203-7363', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'young', '_content': 'young', 'machine_tag': 0}, {'id': '62513739-6796778203-1504', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'hair', '_content': 'hair', 'machine_tag': 0}, {'id': '62513739-6796778203-344832', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'blowy', '_content': 'blowy', 'machine_tag': 0}, {'id': '62513739-6796778203-158277', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'gust', '_content': 'gust', 'machine_tag': 0}, {'id': '62513739-6796778203-1804', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'outside', '_content': 'outside', 'machine_tag': 0}, {'id': '62513739-6796778203-1860', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'outdoors', '_content': 'outdoors', 'machine_tag': 0}, {'id': '62513739-6796778203-11117', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'diagonal', '_content': 'diagonal', 'machine_tag': 0}, {'id': '62513739-6796778203-3360', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'cornwall', '_content': 'cornwall', 'machine_tag': 0}, {'id': '62513739-6796778203-4757', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'united kingdom', '_content': 'unitedkingdom', 'machine_tag': 0}, {'id': '62513739-6796778203-110', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'uk', '_content': 'uk', 'machine_tag': 0}, {'id': '62513739-6796778203-279', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'england', '_content': 'england', 'machine_tag': 0}, {'id': '62513739-6796778203-8461', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'windy', '_content': 'windy', 'machine_tag': 0}, {'id': '62513739-6796778203-181776', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'blustery', '_content': 'blustery', 'machine_tag': 0}, {'id': '62513739-6796778203-608', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'english', '_content': 'english', 'machine_tag': 0}, {'id': '62513739-6796778203-936', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'weather', '_content': 'weather', 'machine_tag': 0}, {'id': '62513739-6796778203-93', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'camera', '_content': 'camera', 'machine_tag': 0}, {'id': '62513739-6796778203-1386', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'vintage', '_content': 'vintage', 'machine_tag': 0}, {'id': '62513739-6796778203-15685', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'frame', '_content': 'frame', 'machine_tag': 0}, {'id': '62513739-6796778203-12613', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'negative', '_content': 'negative', 'machine_tag': 0}, {'id': '62513739-6796778203-2703', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'code', '_content': 'code', 'machine_tag': 0}, {'id': '62513739-6796778203-22593', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '100', '_content': '100', 'machine_tag': 0}, {'id': '62513739-6796778203-11561', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'number', '_content': 'number', 'machine_tag': 0}, {'id': '62513739-6796778203-22573', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'arrow', '_content': 'arrow', 'machine_tag': 0}, {'id': '62513739-6796778203-1412', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'medium format', '_content': 'mediumformat', 'machine_tag': 0}, {'id': '62513739-6796778203-4844', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'fort', '_content': 'fort', 'machine_tag': 0}, {'id': '62513739-6796778203-745203', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'polhawn', '_content': 'polhawn', 'machine_tag': 0}, {'id': '62513739-6796778203-1006210', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '英國', '_content': '英國', 'machine_tag': 0}, {'id': '62513739-6796778203-490988', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '英国', '_content': '英国', 'machine_tag': 0}, {'id': '62513739-6796778203-19177', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'angleterre', '_content': 'angleterre', 'machine_tag': 0}, {'id': '62513739-6796778203-4846', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'britain', '_content': 'britain', 'machine_tag': 0}, {'id': '62513739-6796778203-112', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'great britain', '_content': 'greatbritain', 'machine_tag': 0}, {'id': '62513739-6796778203-885', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'face', '_content': 'face', 'machine_tag': 0}, {'id': '62513739-6796778203-99242', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'visage', '_content': 'visage', 'machine_tag': 0}, {'id': '62513739-6796778203-278', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'portrait', '_content': 'portrait', 'machine_tag': 0}, {'id': '62513739-6796778203-2007', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'one', '_content': 'one', 'machine_tag': 0}, {'id': '62513739-6796778203-1928', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'person', '_content': 'person', 'machine_tag': 0}, {'id': '62513739-6796778203-14578', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'single', '_content': 'single', 'machine_tag': 0}, {'id': '62513739-6796778203-119393', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'individual', '_content': 'individual', 'machine_tag': 0}]}, 'location': {'latitude': '50.323429', 'longitude': '-4.219769', 'accuracy': '16', 'context': '0', 'locality': {'_content': 'Polhawn', 'place_id': 'ur8c.TRSULiH.w', 'woeid': '32216'}, 'county': {'_content': 'Cornwall and Isles of Scilly', 'place_id': '6NMU9K5QULzZZXxaag', 'woeid': '12602181'}, 'region': {'_content': 'England', 'place_id': '2eIY2QFTVr_DwWZNLg', 'woeid': '24554868'}, 'country': {'_content': 'United Kingdom', 'place_id': 'cnffEpdTUb5v258BBA', 'woeid': '23424975'}, 'place_id': 'ur8c.TRSULiH.w', 'woeid': '32216'}, 'geoperms': {'ispublic': 1, 'iscontact': 0, 'isfriend': 0, 'isfamily': 0}, 'urls': {'url': [{'type': 'photopage', '_content': 'https://www.flickr.com/photos/joybot/6796778203/'}]}, 'media': 'photo'}, 'stat': 'ok'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "import urllib.request, json \n",
+ "with urllib.request.urlopen(flickr_url) as url:\n",
+ " data = json.loads(url.read().decode())\n",
+ " print(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open('/home/adam/Downloads/test.json', 'w') as fp:\n",
+ " json.dump(data, fp)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'canaddmeta': 0,\n",
+ " 'canblog': 0,\n",
+ " 'cancomment': 1,\n",
+ " 'candownload': 1,\n",
+ " 'canprint': 0,\n",
+ " 'canshare': 1,\n",
+ " 'comments': '0',\n",
+ " 'dateuploaded': '1193821883',\n",
+ " 'description': 'Urmas Paet with Tallinn skyline as backdrop.',\n",
+ " 'editability': {'canaddmeta': 0, 'cancomment': 0},\n",
+ " 'farm': 3,\n",
+ " 'id': '1808093328',\n",
+ " 'isfamily': 0,\n",
+ " 'isfavorite': 0,\n",
+ " 'isfriend': 0,\n",
+ " 'ispublic': 1,\n",
+ " 'lastupdate': '1291726681',\n",
+ " 'license': '4',\n",
+ " 'media': 'photo',\n",
+ " 'notes': [],\n",
+ " 'originalformat': 'jpg',\n",
+ " 'originalsecret': '84b2933747',\n",
+ " 'owner_id': '16941867@N06',\n",
+ " 'owner_username': 'Estonian Foreign Ministry',\n",
+ " 'people': {'haspeople': 0},\n",
+ " 'posted': '1193821883',\n",
+ " 'rotation': 0,\n",
+ " 'safety_level': '0',\n",
+ " 'secret': '285209b709',\n",
+ " 'server': '2364',\n",
+ " 'tag_ids': ['16896545-1808093328-4053146',\n",
+ " '16896545-1808093328-29495',\n",
+ " '16896545-1808093328-77084',\n",
+ " '16896545-1808093328-3363'],\n",
+ " 'tag_names': ['paet', 'foreign', 'minister', 'estonia'],\n",
+ " 'taken': '2007-10-18 16:01:14',\n",
+ " 'takengranularity': '0',\n",
+ " 'takenunknown': 0,\n",
+ " 'title': 'Estonian Foreign Minister Urmas Paet',\n",
+ " 'urls': {'url': [{'text': 'https://www.flickr.com/photos/estonian-foreign-ministry/1808093328/',\n",
+ " 'type': 'photopage'}]},\n",
+ " 'views': '1127'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(info)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}