{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Who Goes There Prepare Flickr API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import os\n",
    "from os.path import join\n",
    "from glob import glob, iglob\n",
    "from pathlib import Path\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "\n",
    "import h5py\n",
    "from scipy import misc\n",
    "from io import BytesIO\n",
    "from base64 import b64decode\n",
    "\n",
    "from PIL import Image, ImageDraw\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.io as sio\n",
    "import h5py\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "fp_in = '/data_store/datasets/people/who_goes_there/downloads/whogoesthere_dataset.hdf5'\n",
    "fp_out = '/data_store/datasets/people/who_goes_there/research/wgt_flickr_meta_base.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "keys_all = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', \n",
    "  'content_length', 'country_code', 'date_taken', 'date_uploaded', \n",
    "  'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', \n",
    "  'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', \n",
    "  'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', \n",
    "  'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', \n",
    "  'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']\n",
    "\n",
    "keys_keep = ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', \n",
    "  'content_length', 'country_code', 'date_taken', 'date_uploaded', \n",
    "  'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', \n",
    "  'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', \n",
    "  'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', \n",
    "  'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', \n",
    "  'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<KeysViewHDF5 ['accuracy', 'admin1', 'admin2', 'age', 'capture_device', 'city', 'content_length', 'country_code', 'date_taken', 'date_uploaded', 'description', 'face', 'face_bounds', 'face_key', 'face_landmarks_f', 'face_landmarks_o', 'gender', 'im_download_url', 'im_extension_original', 'im_farm_id', 'im_id', 'im_page_url', 'im_secret', 'im_secret_original', 'im_server_id', 'index', 'latitude', 'license_name', 'license_url', 'longitude', 'machine_tags', 'title', 'user_nickname', 'user_nsid', 'user_tags']>\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35deca016b57430bbfc84a2f3eefde51",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=2106478), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "results = []\n",
    "with h5py.File(fp_in, 'r') as fp:\n",
    "    print(fp.keys())\n",
    "    print('')\n",
    "    nsids = fp.get('user_nsid')\n",
    "    for i, nsid in tqdm(enumerate(nsids), total=len(nsids)):\n",
    "      obj = {\n",
    "        'nsid': nsid.decode(),\n",
    "        'photo_id': fp.get(\"im_id\")[i],\n",
    "        'url': fp.get(\"im_download_url\")[i].decode(),\n",
    "        'nickname': fp.get(\"user_nickname\")[0].decode()\n",
    "      }\n",
    "      results.append(obj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame.from_dict(results)\n",
    "df.to_csv(fp_out, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_flickr_meta_ext = '/data_store/datasets/people/who_goes_there/research/wgt_flickr_queries.csv'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert Flickr Queries data to final meta file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "fp_flickr_meta = '/data_store/datasets/people/who_goes_there/research/wgt_flickr_queries.csv'\n",
    "fp_meta_out = '/data_store/datasets/people/who_goes_there/research/who_goes_there_flickr_meta.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_flickr_meta = pd.read_csv(fp_flickr_meta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "groups = df_flickr_meta.groupby('nsid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = []\n",
    "for nsid, group in groups:\n",
    "  obj = {\n",
    "    'nsid': nsid,\n",
    "    'count': len(group)\n",
    "  }\n",
    "  results.append(obj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame.from_dict(results).to_csv(fp_meta_out, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "megapixels",
   "language": "python",
   "name": "megapixels"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}