diff options
Diffstat (limited to 'megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb | 352 |
1 files changed, 352 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb b/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb new file mode 100644 index 00000000..6450e0d9 --- /dev/null +++ b/megapixels/notebooks/datasets/pipa/pipa_flickr_api.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Flickr API" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import os\n", + "from os.path import join\n", + "from glob import glob\n", + "from pathlib import Path\n", + "import requests\n", + "import json\n", + "from pprint import pprint\n", + "from multiprocessing.pool import ThreadPool\n", + "import threading\n", + "import urllib.request\n", + "import hashlib\n", + "\n", + "from tqdm import tqdm_notebook as tqdm\n", + "import pandas as pd\n", + "import numpy as np\n", + "from random import randint\n", + "\n", + "import sys\n", + "sys.path.append('/work/megapixels_dev/megapixels')\n", + "from app.utils import api_utils, identity_utils\n", + "\n", + "from app.settings import app_cfg\n", + "from app.settings import types\n", + "\n", + "import flickr_api # pip install flickr_api\n", + "from flickr_api.flickrerrors import FlickrAPIError" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create CSV for API" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "fp_in = '/data_store/datasets/people/pipa/downloads/annotations/index.txt'\n", + "fp_out = '/data_store_hdd/datasets/people/pipa/research/flickr_metadata/'\n", + "fp_out_photo_ids = '/data_store/datasets/people/pipa/research/pipa_photo_ids.csv'\n", + "fp_out_photoset_ids = '/data_store/datasets/people/pipa/research/pipa_photoset_ids.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "names = ['photoset_id', 'photo_id', 'xmin', 'ymin', 'height', 'identity_id', 'subset_id']\n", + "df_records = pd.read_csv(fp_in, delimiter=' ', names=names, index_col=False)\n", + "df_records.drop(['xmin', 'ymin', 'height', 'identity_id', 'subset_id'], axis=1, inplace=True)\n", + "records = df_records.to_dict('records')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "<photoset_id> <photo_id> <xmin> <ymin> <width> <height> <identity_id> <subset_id>\n", + "The subset IDs are 0 for leftover, 1 for train, 2 for validation, 3 for test\n", + "\n", + "The photos are located in the images directory, in the form <photoset_id>_<photo_id>.jpg\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "photo_ids = []\n", + "for record in records:\n", + " fp = '{}.json'.format(record['photo_id'])\n", + " photo_ids.append({'filepath': fp, 'query': record['photo_id']}) \n", + "df_photo_ids = pd.DataFrame.from_dict(photo_ids)\n", + "df_photo_ids.to_csv(fp_out_photo_ids, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "photoset_ids = []\n", + "for record in records:\n", + " fp = '{}.json'.format(record['photoset_id'])\n", + " photoset_ids.append({'filepath': fp, 'query': record['photoset_id']}) \n", + "df_photoset_ids = pd.DataFrame.from_dict(photoset_ids)\n", + "df_photoset_ids.to_csv(fp_out_photoset_ids, index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Flickr API" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "FLICKR_API_KEY = '31cae6cbba7020585a34bf5fcd772e16'\n", + "FLICKR_API_SECRET = '4688b458314b2e09'\n", + "\n", + "# init Flickr API\n", + "flickr_api.set_keys(api_key=FLICKR_API_KEY, api_secret=FLICKR_API_SECRET)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1808093328\n", + "72157607317745768\n" + ] + } + ], + "source": [ + "photo_id = records[0]['photo_id']\n", + "print(photo_id)\n", + "print(records[0]['photoset_id'])" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "photo = flickr_api.Photo(id=photo_id)\n", + "info = photo.getInfo()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=31cae6cbba7020585a34bf5fcd772e16&photo_id=6796778203&format=json&nojsoncallback=1'" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://some-address.com/api/This+is+a+test'" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from requests.compat import urljoin, quote_plus\n", + "url = \"http://some-address.com/api/\"\n", + "term = 'This is a test'\n", + "flickr_url = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=31cae6cbba7020585a34bf5fcd772e16&photo_id=6796778203&format=json&nojsoncallback=1'\n", + "\n", + "urljoin(url, quote_plus(term))\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from urllib.request.u" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'photo': {'id': '6796778203', 'secret': 'e2c0873305', 'server': '7033', 'farm': 8, 'dateuploaded': '1328033443', 'isfavorite': 0, 'license': '0', 'safety_level': '0', 'rotation': 0, 'owner': {'nsid': '62559061@N06', 'username': 'Joybot', 'realname': '', 'location': 'United Kingdom', 'iconserver': '4503', 'iconfarm': 5, 'path_alias': 'joybot'}, 'title': {'_content': 'Unfortunate hair thanks to sudden gust of wind and lack of hair straighteners'}, 'description': {'_content': 'I think this was before the time of widespread hair straighteners.\\nWhat a wonderful modern world we now live in.\\n\\nPolhawn Fort, Cornwall, UK.\\nFuji colour negative film, 100ISO, Kodak Box Brownie (620 format, cropped).'}, 'visibility': {'ispublic': 1, 'isfriend': 0, 'isfamily': 0}, 'dates': {'posted': '1328033443', 'taken': '2000-12-01 00:00:00', 'takengranularity': '4', 'takenunknown': 0, 'lastupdate': '1348236375'}, 'views': '438', 'editability': {'cancomment': 0, 'canaddmeta': 0}, 'publiceditability': {'cancomment': 1, 'canaddmeta': 0}, 'usage': {'candownload': 0, 'canblog': 0, 'canprint': 0, 'canshare': 0}, 'comments': {'_content': '0'}, 'notes': {'note': []}, 'people': {'haspeople': 0}, 'tags': {'tag': [{'id': '62513739-6796778203-201512', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '620', '_content': '620', 'machine_tag': 0}, {'id': '62513739-6796778203-19713', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'roll', '_content': 'roll', 'machine_tag': 0}, {'id': '62513739-6796778203-664', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'film', '_content': 'film', 'machine_tag': 0}, {'id': '62513739-6796778203-19853', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'brownie', '_content': 'brownie', 'machine_tag': 0}, {'id': '62513739-6796778203-4689', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'kodak', '_content': 'kodak', 'machine_tag': 0}, {'id': '62513739-6796778203-4845', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'box', '_content': 'box', 'machine_tag': 0}, {'id': '62513739-6796778203-368625', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'box brownie', '_content': 'boxbrownie', 'machine_tag': 0}, {'id': '62513739-6796778203-545', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'colour', '_content': 'colour', 'machine_tag': 0}, {'id': '62513739-6796778203-544', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'color', '_content': 'color', 'machine_tag': 0}, {'id': '62513739-6796778203-1606', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'print', '_content': 'print', 'machine_tag': 0}, {'id': '62513739-6796778203-452117', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'c41', '_content': 'c41', 'machine_tag': 0}, {'id': '62513739-6796778203-867', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'girl', '_content': 'girl', 'machine_tag': 0}, {'id': '62513739-6796778203-4461', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'wind', '_content': 'wind', 'machine_tag': 0}, {'id': '62513739-6796778203-347', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'woman', '_content': 'woman', 'machine_tag': 0}, {'id': '62513739-6796778203-7363', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'young', '_content': 'young', 'machine_tag': 0}, {'id': '62513739-6796778203-1504', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'hair', '_content': 'hair', 'machine_tag': 0}, {'id': '62513739-6796778203-344832', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'blowy', '_content': 'blowy', 'machine_tag': 0}, {'id': '62513739-6796778203-158277', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'gust', '_content': 'gust', 'machine_tag': 0}, {'id': '62513739-6796778203-1804', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'outside', '_content': 'outside', 'machine_tag': 0}, {'id': '62513739-6796778203-1860', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'outdoors', '_content': 'outdoors', 'machine_tag': 0}, {'id': '62513739-6796778203-11117', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'diagonal', '_content': 'diagonal', 'machine_tag': 0}, {'id': '62513739-6796778203-3360', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'cornwall', '_content': 'cornwall', 'machine_tag': 0}, {'id': '62513739-6796778203-4757', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'united kingdom', '_content': 'unitedkingdom', 'machine_tag': 0}, {'id': '62513739-6796778203-110', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'uk', '_content': 'uk', 'machine_tag': 0}, {'id': '62513739-6796778203-279', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'england', '_content': 'england', 'machine_tag': 0}, {'id': '62513739-6796778203-8461', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'windy', '_content': 'windy', 'machine_tag': 0}, {'id': '62513739-6796778203-181776', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'blustery', '_content': 'blustery', 'machine_tag': 0}, {'id': '62513739-6796778203-608', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'english', '_content': 'english', 'machine_tag': 0}, {'id': '62513739-6796778203-936', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'weather', '_content': 'weather', 'machine_tag': 0}, {'id': '62513739-6796778203-93', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'camera', '_content': 'camera', 'machine_tag': 0}, {'id': '62513739-6796778203-1386', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'vintage', '_content': 'vintage', 'machine_tag': 0}, {'id': '62513739-6796778203-15685', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'frame', '_content': 'frame', 'machine_tag': 0}, {'id': '62513739-6796778203-12613', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'negative', '_content': 'negative', 'machine_tag': 0}, {'id': '62513739-6796778203-2703', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'code', '_content': 'code', 'machine_tag': 0}, {'id': '62513739-6796778203-22593', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '100', '_content': '100', 'machine_tag': 0}, {'id': '62513739-6796778203-11561', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'number', '_content': 'number', 'machine_tag': 0}, {'id': '62513739-6796778203-22573', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'arrow', '_content': 'arrow', 'machine_tag': 0}, {'id': '62513739-6796778203-1412', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'medium format', '_content': 'mediumformat', 'machine_tag': 0}, {'id': '62513739-6796778203-4844', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'fort', '_content': 'fort', 'machine_tag': 0}, {'id': '62513739-6796778203-745203', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'polhawn', '_content': 'polhawn', 'machine_tag': 0}, {'id': '62513739-6796778203-1006210', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '英國', '_content': '英國', 'machine_tag': 0}, {'id': '62513739-6796778203-490988', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': '英国', '_content': '英国', 'machine_tag': 0}, {'id': '62513739-6796778203-19177', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'angleterre', '_content': 'angleterre', 'machine_tag': 0}, {'id': '62513739-6796778203-4846', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'britain', '_content': 'britain', 'machine_tag': 0}, {'id': '62513739-6796778203-112', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'great britain', '_content': 'greatbritain', 'machine_tag': 0}, {'id': '62513739-6796778203-885', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'face', '_content': 'face', 'machine_tag': 0}, {'id': '62513739-6796778203-99242', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'visage', '_content': 'visage', 'machine_tag': 0}, {'id': '62513739-6796778203-278', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'portrait', '_content': 'portrait', 'machine_tag': 0}, {'id': '62513739-6796778203-2007', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'one', '_content': 'one', 'machine_tag': 0}, {'id': '62513739-6796778203-1928', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'person', '_content': 'person', 'machine_tag': 0}, {'id': '62513739-6796778203-14578', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'single', '_content': 'single', 'machine_tag': 0}, {'id': '62513739-6796778203-119393', 'author': '62559061@N06', 'authorname': 'Joybot', 'raw': 'individual', '_content': 'individual', 'machine_tag': 0}]}, 'location': {'latitude': '50.323429', 'longitude': '-4.219769', 'accuracy': '16', 'context': '0', 'locality': {'_content': 'Polhawn', 'place_id': 'ur8c.TRSULiH.w', 'woeid': '32216'}, 'county': {'_content': 'Cornwall and Isles of Scilly', 'place_id': '6NMU9K5QULzZZXxaag', 'woeid': '12602181'}, 'region': {'_content': 'England', 'place_id': '2eIY2QFTVr_DwWZNLg', 'woeid': '24554868'}, 'country': {'_content': 'United Kingdom', 'place_id': 'cnffEpdTUb5v258BBA', 'woeid': '23424975'}, 'place_id': 'ur8c.TRSULiH.w', 'woeid': '32216'}, 'geoperms': {'ispublic': 1, 'iscontact': 0, 'isfriend': 0, 'isfamily': 0}, 'urls': {'url': [{'type': 'photopage', '_content': 'https://www.flickr.com/photos/joybot/6796778203/'}]}, 'media': 'photo'}, 'stat': 'ok'}\n" + ] + } + ], + "source": [ + "import urllib.request, json \n", + "with urllib.request.urlopen(flickr_url) as url:\n", + " data = json.loads(url.read().decode())\n", + " print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "with open('/home/adam/Downloads/test.json', 'w') as fp:\n", + " json.dump(data, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'canaddmeta': 0,\n", + " 'canblog': 0,\n", + " 'cancomment': 1,\n", + " 'candownload': 1,\n", + " 'canprint': 0,\n", + " 'canshare': 1,\n", + " 'comments': '0',\n", + " 'dateuploaded': '1193821883',\n", + " 'description': 'Urmas Paet with Tallinn skyline as backdrop.',\n", + " 'editability': {'canaddmeta': 0, 'cancomment': 0},\n", + " 'farm': 3,\n", + " 'id': '1808093328',\n", + " 'isfamily': 0,\n", + " 'isfavorite': 0,\n", + " 'isfriend': 0,\n", + " 'ispublic': 1,\n", + " 'lastupdate': '1291726681',\n", + " 'license': '4',\n", + " 'media': 'photo',\n", + " 'notes': [],\n", + " 'originalformat': 'jpg',\n", + " 'originalsecret': '84b2933747',\n", + " 'owner_id': '16941867@N06',\n", + " 'owner_username': 'Estonian Foreign Ministry',\n", + " 'people': {'haspeople': 0},\n", + " 'posted': '1193821883',\n", + " 'rotation': 0,\n", + " 'safety_level': '0',\n", + " 'secret': '285209b709',\n", + " 'server': '2364',\n", + " 'tag_ids': ['16896545-1808093328-4053146',\n", + " '16896545-1808093328-29495',\n", + " '16896545-1808093328-77084',\n", + " '16896545-1808093328-3363'],\n", + " 'tag_names': ['paet', 'foreign', 'minister', 'estonia'],\n", + " 'taken': '2007-10-18 16:01:14',\n", + " 'takengranularity': '0',\n", + " 'takenunknown': 0,\n", + " 'title': 'Estonian Foreign Minister Urmas Paet',\n", + " 'urls': {'url': [{'text': 'https://www.flickr.com/photos/estonian-foreign-ministry/1808093328/',\n", + " 'type': 'photopage'}]},\n", + " 'views': '1127'}\n" + ] + } + ], + "source": [ + "pprint(info)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "megapixels", + "language": "python", + "name": "megapixels" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
