summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/sdr
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/sdr')
-rw-r--r--megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb265
1 files changed, 265 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb
new file mode 100644
index 00000000..f10a7032
--- /dev/null
+++ b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb
@@ -0,0 +1,265 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Social Relationship Domain\n",
+ "\n",
+ "- examine Instagram URLs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "from os.path import join\n",
+ "import math\n",
+ "from glob import glob\n",
+ "from random import randint\n",
+ "import json\n",
+ "from pprint import pprint\n",
+ "\n",
+ "import cv2 as cv\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from PIL import Image, ImageDraw\n",
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import scipy.io\n",
+ "from pathlib import Path\n",
+ "from sklearn import preprocessing\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('/work/megapixels_dev/megapixels/')\n",
+ "from app.settings import app_cfg as cfg\n",
+ "from app.utils import file_utils, im_utils\n",
+ "from app.models.bbox import BBox"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_dir = '/data_store/datasets/people/social_relation_pipa/dataset/'\n",
+ "fp_anno_info = '/data_store/datasets/people/social_relation_pipa/dataset/annotation_image_info.json'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def media_id_to_code(media_id):\n",
+ " alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n",
+ " short_code = ''\n",
+ " while media_id > 0:\n",
+ " remainder = media_id % 64\n",
+ " media_id = (media_id-remainder)//64\n",
+ " short_code = alphabet[remainder] + short_code\n",
+ " return short_code\n",
+ "\n",
+ "def code_to_media_id(short_code):\n",
+ " alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n",
+ " media_id = 0;\n",
+ " for letter in short_code:\n",
+ " media_id = (media_id*64) + alphabet.index(letter)\n",
+ " return media_id"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load JSON to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "23995\n"
+ ]
+ }
+ ],
+ "source": [
+ "with open(fp_anno_info, 'r') as fp:\n",
+ " annos_data = json.load(fp)\n",
+ "print(len(data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['instagram', 'bing-doctor', 'yfcc100m', 'flickr', 'coco_val2014', 'coco_train2014', 'Flickr', 'Bing', 'visual_genome']\n"
+ ]
+ }
+ ],
+ "source": [
+ "annos_ig = []\n",
+ "sources = []\n",
+ "for anno_data in annos_data:\n",
+ " src = anno_data['source']\n",
+ " if src not in sources:\n",
+ " sources.append(src)\n",
+ " if anno_data['source'] == 'instagram':\n",
+ " annos_ig.append(anno_data)\n",
+ "print(sources)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Instagram: 42\n",
+ "bing: 74\n",
+ "flickr: 2079\n",
+ "yfcc100m: 4340\n",
+ "Bing: 170\n",
+ "VG: 4218\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'Instagram: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"instagram\")}')\n",
+ "print(f'bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"bing-doctor\")}')\n",
+ "print(f'flickr: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"flickr\")}')\n",
+ "print(f'yfcc100m: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"yfcc100m\")}')\n",
+ "print(f'Bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"Bing\")}')\n",
+ "print(f'VG: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"visual_genome\")}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'imgH': 640, 'source': 'instagram', 'bbox': [[62, 266, 325, 639], [224, 219, 564, 636]], 'source_id': '527824347898327040', 'imgW': 640, 'id': 0}\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(annos_ig[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "https://instagram.com/p/dTNXKoSDAA\n",
+ "https://instagram.com/p/dTXS1qSIAA\n",
+ "https://instagram.com/p/dT5EOOyCAA\n",
+ "https://instagram.com/p/dT8Sp_yDAB\n",
+ "https://instagram.com/p/dT9JCAQhAA\n",
+ "https://instagram.com/p/dT_SgSSCAA\n",
+ "https://instagram.com/p/dUA4XdCCAA\n",
+ "https://instagram.com/p/dUCxd_SFAA\n",
+ "https://instagram.com/p/dUHNmxSCAA\n",
+ "https://instagram.com/p/dUSSRTQkAB\n",
+ "https://instagram.com/p/dUXnSJwjAE\n",
+ "https://instagram.com/p/dUXw23AhAB\n",
+ "https://instagram.com/p/dUY0EAQkAA\n",
+ "https://instagram.com/p/dUZQeMAhAB\n",
+ "https://instagram.com/p/dUcEquSCAA\n",
+ "https://instagram.com/p/dUcI0tAjAB\n",
+ "https://instagram.com/p/dUcgMcAjAA\n",
+ "https://instagram.com/p/dUdzB6AkAC\n",
+ "https://instagram.com/p/dUhFAgiCAA\n",
+ "https://instagram.com/p/dVA5PJCGAA\n",
+ "https://instagram.com/p/dVA_qoAjAA\n",
+ "https://instagram.com/p/dVDQq4CCAA\n",
+ "https://instagram.com/p/dVSYfGQmAA\n",
+ "https://instagram.com/p/dVXL1JiHAA\n",
+ "https://instagram.com/p/dVXuiqyEAA\n",
+ "https://instagram.com/p/dVezqxiFAA\n",
+ "https://instagram.com/p/dVfFhNyCAB\n",
+ "https://instagram.com/p/dVkZddAgAA\n",
+ "https://instagram.com/p/dVk0IqCAAA\n",
+ "https://instagram.com/p/dVlXCpiEAB\n",
+ "https://instagram.com/p/dVlZVoCEAB\n",
+ "https://instagram.com/p/dV0hRtSGAA\n",
+ "https://instagram.com/p/dV2HchSFAA\n",
+ "https://instagram.com/p/dV2SrliIAC\n",
+ "https://instagram.com/p/dV25bbSGAA\n",
+ "https://instagram.com/p/dV3umuCGAA\n",
+ "https://instagram.com/p/dV8XH4SGAA\n",
+ "https://instagram.com/p/dWjs8FwnAA\n",
+ "https://instagram.com/p/dWkMfZyEAA\n",
+ "https://instagram.com/p/dWzrJniBAB\n",
+ "https://instagram.com/p/dW30c4yDAA\n",
+ "https://instagram.com/p/dW4bG2iHAB\n"
+ ]
+ }
+ ],
+ "source": [
+ "for anno_ig in annos_ig:\n",
+ " media_code = media_id_to_code(int(anno_ig['source_id']))\n",
+ " url = f'https://instagram.com/p/{media_code}'\n",
+ " print(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}