diff options
Diffstat (limited to 'megapixels/notebooks/datasets/ijb_count/ibj_count.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/ijb_count/ibj_count.ipynb | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/ijb_count/ibj_count.ipynb b/megapixels/notebooks/datasets/ijb_count/ibj_count.ipynb new file mode 100644 index 00000000..b4a29243 --- /dev/null +++ b/megapixels/notebooks/datasets/ijb_count/ibj_count.ipynb @@ -0,0 +1,206 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Count IJB sources" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import os\n", + "from os.path import join\n", + "from glob import glob\n", + "from pathlib import Path\n", + "import requests\n", + "import json\n", + "\n", + "from tqdm import tqdm_notebook as tqdm\n", + "import pandas as pd\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import sys\n", + "sys.path.append('/work/megapixels_dev/megapixels')\n", + "from app.utils import file_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# load JSON\n", + "fp_in_cs3 = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv'\n", + "fp_in_cs4 = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs4_media.csv'\n", + "fp_in_ijb_b = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-B/ijbb_licenses_and_sources.csv'\n", + "fp_in_ijb_a = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-A/SOURCES.csv'\n", + "fp_out = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/summary.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "df_cs3 = pd.read_csv(fp_in_cs3)\n", + "df_cs4 = pd.read_csv(fp_in_cs4)\n", + "df_sources = df_cs3.append(df_cs4)\n", + "df_sources.fillna('', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# ijb b\n", + "#df_sources = pd.read_csv(fp_in_ijb_b).fillna('')\n", + "# ijb a\n", + "df_sources = pd.read_csv(fp_in_ijb_a).fillna('')" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "sources = df_sources.to_dict('records')" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "results = {}\n", + "others = []\n", + "keys = ['flickr.com', 'youtube.com', 'wikipedia.org', 'wikimedia.org']\n", + "for k in keys:\n", + " results[k] = []\n", + "for source in sources:\n", + " url = str(source['Media URL'])\n", + " media_id = source['Media ID']\n", + " if 'nonfaces' in media_id:\n", + " continue\n", + " found = False\n", + " for k in keys:\n", + " if k in url:\n", + " results[k].append(url)\n", + " found = True\n", + " if not found:\n", + " if url:\n", + " others.append(url)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "flickr.com 0\n", + "youtube.com 1388\n", + "wikipedia.org 0\n", + "wikimedia.org 4298\n" + ] + } + ], + "source": [ + "for k,v in results.items():\n", + " print(k, len(set(v)))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "siliconangle.com/files/2011/06/kaz-hirai.jpg\n", + "etnosi.files.wordpress.com/2012/05/sofi-marinova-baku.jpg\n", + "images.coveralia.com/audio/p/Pia_Zadora-When_The_Lights_Go_Out-Interior_Frontal.jpg\n", + "4.bp.blogspot.com/-TFHOJVIW3a8/T_1mD6MdOxI/AAAAAAAADAg/PhKDPx0Aqu0/s1600/ivan_pavlov.jpg\n", + "863793661388437597-a-1802744773732722657-s-sites.googlegroups.com/site/virginmarysite/Home/jackneosex.jpg\n", + "amckiereads.files.wordpress.com/2010/12/darwish.jpg?w=600\n", + "img.interia.pl/komputery/nimg/5/7/Kazuo_Hirai_plan_odbudowe_5726348.jpg\n", + "2.bp.blogspot.com/-JAYvKsHcQPI/T4f3wbCIMDI/AAAAAAAAFDM/lTs3uKlb3A0/s1600/deeksha_seth_launches_chandana_brothers_showroom_Yellow+Saree+smiling+pics+%25285%2529.jpg\n", + "1.bp.blogspot.com/-D3SI27GS7-g/U-iD5fPcFDI/AAAAAAAABOs/VaB_BRRa6OU/s320/news8.jpg\n", + "1.bp.blogspot.com/_ilOjS7A_kk4/SVGCtcyAAmI/AAAAAAAAAH4/9-KKBqYeDBA/s400/playstation-3-grill_12.jpg\n" + ] + } + ], + "source": [ + "for other in others[:10]:\n", + " print(other)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "21319" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(results['flickr.com']) +len(results['wikimedia.org']) + len(others)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "megapixels", + "language": "python", + "name": "megapixels" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
