1 files changed, 265 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb
new file mode 100644
index 00000000..f10a7032
--- /dev/null
+++ b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb
@@ -0,0 +1,265 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Social Relationship Domain\n",
+    "\n",
+    "- examine Instagram URLs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import os\n",
+    "from os.path import join\n",
+    "import math\n",
+    "from glob import glob\n",
+    "from random import randint\n",
+    "import json\n",
+    "from pprint import pprint\n",
+    "\n",
+    "import cv2 as cv\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from PIL import Image, ImageDraw\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import scipy.io\n",
+    "from pathlib import Path\n",
+    "from sklearn import preprocessing\n",
+    "from tqdm import tqdm_notebook as tqdm\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('/work/megapixels_dev/megapixels/')\n",
+    "from app.settings import app_cfg as cfg\n",
+    "from app.utils import file_utils, im_utils\n",
+    "from app.models.bbox import BBox"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fp_dir = '/data_store/datasets/people/social_relation_pipa/dataset/'\n",
+    "fp_anno_info = '/data_store/datasets/people/social_relation_pipa/dataset/annotation_image_info.json'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def media_id_to_code(media_id):\n",
+    "  alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n",
+    "  short_code = ''\n",
+    "  while media_id > 0:\n",
+    "    remainder = media_id % 64\n",
+    "    media_id = (media_id-remainder)//64\n",
+    "    short_code = alphabet[remainder] + short_code\n",
+    "  return short_code\n",
+    "\n",
+    "def code_to_media_id(short_code):\n",
+    "  alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n",
+    "  media_id = 0;\n",
+    "  for letter in short_code:\n",
+    "    media_id = (media_id*64) + alphabet.index(letter)\n",
+    "  return media_id"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load JSON to CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "23995\n"
+     ]
+    }
+   ],
+   "source": [
+    "with open(fp_anno_info, 'r') as fp:\n",
+    "  annos_data = json.load(fp)\n",
+    "print(len(data))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['instagram', 'bing-doctor', 'yfcc100m', 'flickr', 'coco_val2014', 'coco_train2014', 'Flickr', 'Bing', 'visual_genome']\n"
+     ]
+    }
+   ],
+   "source": [
+    "annos_ig = []\n",
+    "sources = []\n",
+    "for anno_data in annos_data:\n",
+    "  src = anno_data['source']\n",
+    "  if src not in sources:\n",
+    "    sources.append(src)\n",
+    "  if anno_data['source'] == 'instagram':\n",
+    "    annos_ig.append(anno_data)\n",
+    "print(sources)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Instagram: 42\n",
+      "bing: 74\n",
+      "flickr: 2079\n",
+      "yfcc100m: 4340\n",
+      "Bing: 170\n",
+      "VG: 4218\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'Instagram: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"instagram\")}')\n",
+    "print(f'bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"bing-doctor\")}')\n",
+    "print(f'flickr: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"flickr\")}')\n",
+    "print(f'yfcc100m: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"yfcc100m\")}')\n",
+    "print(f'Bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"Bing\")}')\n",
+    "print(f'VG: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"visual_genome\")}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'imgH': 640, 'source': 'instagram', 'bbox': [[62, 266, 325, 639], [224, 219, 564, 636]], 'source_id': '527824347898327040', 'imgW': 640, 'id': 0}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(annos_ig[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://instagram.com/p/dTNXKoSDAA\n",
+      "https://instagram.com/p/dTXS1qSIAA\n",
+      "https://instagram.com/p/dT5EOOyCAA\n",
+      "https://instagram.com/p/dT8Sp_yDAB\n",
+      "https://instagram.com/p/dT9JCAQhAA\n",
+      "https://instagram.com/p/dT_SgSSCAA\n",
+      "https://instagram.com/p/dUA4XdCCAA\n",
+      "https://instagram.com/p/dUCxd_SFAA\n",
+      "https://instagram.com/p/dUHNmxSCAA\n",
+      "https://instagram.com/p/dUSSRTQkAB\n",
+      "https://instagram.com/p/dUXnSJwjAE\n",
+      "https://instagram.com/p/dUXw23AhAB\n",
+      "https://instagram.com/p/dUY0EAQkAA\n",
+      "https://instagram.com/p/dUZQeMAhAB\n",
+      "https://instagram.com/p/dUcEquSCAA\n",
+      "https://instagram.com/p/dUcI0tAjAB\n",
+      "https://instagram.com/p/dUcgMcAjAA\n",
+      "https://instagram.com/p/dUdzB6AkAC\n",
+      "https://instagram.com/p/dUhFAgiCAA\n",
+      "https://instagram.com/p/dVA5PJCGAA\n",
+      "https://instagram.com/p/dVA_qoAjAA\n",
+      "https://instagram.com/p/dVDQq4CCAA\n",
+      "https://instagram.com/p/dVSYfGQmAA\n",
+      "https://instagram.com/p/dVXL1JiHAA\n",
+      "https://instagram.com/p/dVXuiqyEAA\n",
+      "https://instagram.com/p/dVezqxiFAA\n",
+      "https://instagram.com/p/dVfFhNyCAB\n",
+      "https://instagram.com/p/dVkZddAgAA\n",
+      "https://instagram.com/p/dVk0IqCAAA\n",
+      "https://instagram.com/p/dVlXCpiEAB\n",
+      "https://instagram.com/p/dVlZVoCEAB\n",
+      "https://instagram.com/p/dV0hRtSGAA\n",
+      "https://instagram.com/p/dV2HchSFAA\n",
+      "https://instagram.com/p/dV2SrliIAC\n",
+      "https://instagram.com/p/dV25bbSGAA\n",
+      "https://instagram.com/p/dV3umuCGAA\n",
+      "https://instagram.com/p/dV8XH4SGAA\n",
+      "https://instagram.com/p/dWjs8FwnAA\n",
+      "https://instagram.com/p/dWkMfZyEAA\n",
+      "https://instagram.com/p/dWzrJniBAB\n",
+      "https://instagram.com/p/dW30c4yDAA\n",
+      "https://instagram.com/p/dW4bG2iHAB\n"
+     ]
+    }
+   ],
+   "source": [
+    "for anno_ig in annos_ig:\n",
+    "  media_code = media_id_to_code(int(anno_ig['source_id']))\n",
+    "  url = f'https://instagram.com/p/{media_code}'\n",
+    "  print(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "megapixels",
+   "language": "python",
+   "name": "megapixels"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}