diff options
Diffstat (limited to 'megapixels/notebooks/datasets/sdr')
| -rw-r--r-- | megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb new file mode 100644 index 00000000..f10a7032 --- /dev/null +++ b/megapixels/notebooks/datasets/sdr/sdr_instagram.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Social Relationship Domain\n", + "\n", + "- examine Instagram URLs" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import os\n", + "from os.path import join\n", + "import math\n", + "from glob import glob\n", + "from random import randint\n", + "import json\n", + "from pprint import pprint\n", + "\n", + "import cv2 as cv\n", + "import numpy as np\n", + "import pandas as pd\n", + "from PIL import Image, ImageDraw\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import scipy.io\n", + "from pathlib import Path\n", + "from sklearn import preprocessing\n", + "from tqdm import tqdm_notebook as tqdm\n", + "\n", + "import sys\n", + "sys.path.append('/work/megapixels_dev/megapixels/')\n", + "from app.settings import app_cfg as cfg\n", + "from app.utils import file_utils, im_utils\n", + "from app.models.bbox import BBox" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "fp_dir = '/data_store/datasets/people/social_relation_pipa/dataset/'\n", + "fp_anno_info = '/data_store/datasets/people/social_relation_pipa/dataset/annotation_image_info.json'" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def media_id_to_code(media_id):\n", + " alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n", + " short_code = ''\n", + " while media_id > 0:\n", + " remainder = media_id % 64\n", + " media_id = (media_id-remainder)//64\n", + " short_code = alphabet[remainder] + short_code\n", + " return short_code\n", + "\n", + "def code_to_media_id(short_code):\n", + " alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'\n", + " media_id = 0;\n", + " for letter in short_code:\n", + " media_id = (media_id*64) + alphabet.index(letter)\n", + " return media_id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load JSON to CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23995\n" + ] + } + ], + "source": [ + "with open(fp_anno_info, 'r') as fp:\n", + " annos_data = json.load(fp)\n", + "print(len(data))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['instagram', 'bing-doctor', 'yfcc100m', 'flickr', 'coco_val2014', 'coco_train2014', 'Flickr', 'Bing', 'visual_genome']\n" + ] + } + ], + "source": [ + "annos_ig = []\n", + "sources = []\n", + "for anno_data in annos_data:\n", + " src = anno_data['source']\n", + " if src not in sources:\n", + " sources.append(src)\n", + " if anno_data['source'] == 'instagram':\n", + " annos_ig.append(anno_data)\n", + "print(sources)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Instagram: 42\n", + "bing: 74\n", + "flickr: 2079\n", + "yfcc100m: 4340\n", + "Bing: 170\n", + "VG: 4218\n" + ] + } + ], + "source": [ + "print(f'Instagram: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"instagram\")}')\n", + "print(f'bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"bing-doctor\")}')\n", + "print(f'flickr: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"flickr\")}')\n", + "print(f'yfcc100m: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"yfcc100m\")}')\n", + "print(f'Bing: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"Bing\")}')\n", + "print(f'VG: {sum(1 for anno_data in annos_data if anno_data[\"source\"] == \"visual_genome\")}')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'imgH': 640, 'source': 'instagram', 'bbox': [[62, 266, 325, 639], [224, 219, 564, 636]], 'source_id': '527824347898327040', 'imgW': 640, 'id': 0}\n" + ] + } + ], + "source": [ + "print(annos_ig[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://instagram.com/p/dTNXKoSDAA\n", + "https://instagram.com/p/dTXS1qSIAA\n", + "https://instagram.com/p/dT5EOOyCAA\n", + "https://instagram.com/p/dT8Sp_yDAB\n", + "https://instagram.com/p/dT9JCAQhAA\n", + "https://instagram.com/p/dT_SgSSCAA\n", + "https://instagram.com/p/dUA4XdCCAA\n", + "https://instagram.com/p/dUCxd_SFAA\n", + "https://instagram.com/p/dUHNmxSCAA\n", + "https://instagram.com/p/dUSSRTQkAB\n", + "https://instagram.com/p/dUXnSJwjAE\n", + "https://instagram.com/p/dUXw23AhAB\n", + "https://instagram.com/p/dUY0EAQkAA\n", + "https://instagram.com/p/dUZQeMAhAB\n", + "https://instagram.com/p/dUcEquSCAA\n", + "https://instagram.com/p/dUcI0tAjAB\n", + "https://instagram.com/p/dUcgMcAjAA\n", + "https://instagram.com/p/dUdzB6AkAC\n", + "https://instagram.com/p/dUhFAgiCAA\n", + "https://instagram.com/p/dVA5PJCGAA\n", + "https://instagram.com/p/dVA_qoAjAA\n", + "https://instagram.com/p/dVDQq4CCAA\n", + "https://instagram.com/p/dVSYfGQmAA\n", + "https://instagram.com/p/dVXL1JiHAA\n", + "https://instagram.com/p/dVXuiqyEAA\n", + "https://instagram.com/p/dVezqxiFAA\n", + "https://instagram.com/p/dVfFhNyCAB\n", + "https://instagram.com/p/dVkZddAgAA\n", + "https://instagram.com/p/dVk0IqCAAA\n", + "https://instagram.com/p/dVlXCpiEAB\n", + "https://instagram.com/p/dVlZVoCEAB\n", + "https://instagram.com/p/dV0hRtSGAA\n", + "https://instagram.com/p/dV2HchSFAA\n", + "https://instagram.com/p/dV2SrliIAC\n", + "https://instagram.com/p/dV25bbSGAA\n", + "https://instagram.com/p/dV3umuCGAA\n", + "https://instagram.com/p/dV8XH4SGAA\n", + "https://instagram.com/p/dWjs8FwnAA\n", + "https://instagram.com/p/dWkMfZyEAA\n", + "https://instagram.com/p/dWzrJniBAB\n", + "https://instagram.com/p/dW30c4yDAA\n", + "https://instagram.com/p/dW4bG2iHAB\n" + ] + } + ], + "source": [ + "for anno_ig in annos_ig:\n", + " media_code = media_id_to_code(int(anno_ig['source_id']))\n", + " url = f'https://instagram.com/p/{media_code}'\n", + " print(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "megapixels", + "language": "python", + "name": "megapixels" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
