{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Count IJB sources" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "from os.path import join\n", "from glob import glob\n", "from pathlib import Path\n", "import requests\n", "import json\n", "\n", "from tqdm import tqdm_notebook as tqdm\n", "import pandas as pd\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.append('/work/megapixels_dev/megapixels')\n", "from app.utils import file_utils" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "# load JSON\n", "fp_in_cs3 = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv'\n", "fp_in_cs4 = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs4_media.csv'\n", "fp_in_ijb_b = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-B/ijbb_licenses_and_sources.csv'\n", "fp_in_ijb_a = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-A/SOURCES.csv'\n", "fp_out = '/data_store_hdd/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/summary.csv'" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "df_cs3 = pd.read_csv(fp_in_cs3)\n", "df_cs4 = pd.read_csv(fp_in_cs4)\n", "df_sources = df_cs3.append(df_cs4)\n", "df_sources.fillna('', inplace=True)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "# ijb b\n", "#df_sources = pd.read_csv(fp_in_ijb_b).fillna('')\n", "# ijb a\n", "df_sources = pd.read_csv(fp_in_ijb_a).fillna('')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "sources = df_sources.to_dict('records')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "results = {}\n", "others = []\n", "keys = ['flickr.com', 'youtube.com', 'wikipedia.org', 'wikimedia.org']\n", "for k in keys:\n", " results[k] = []\n", "for source in sources:\n", " url = str(source['Media URL'])\n", " media_id = source['Media ID']\n", " if 'nonfaces' in media_id:\n", " continue\n", " found = False\n", " for k in keys:\n", " if k in url:\n", " results[k].append(url)\n", " found = True\n", " if not found:\n", " if url:\n", " others.append(url)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "flickr.com 0\n", "youtube.com 1388\n", "wikipedia.org 0\n", "wikimedia.org 4298\n" ] } ], "source": [ "for k,v in results.items():\n", " print(k, len(set(v)))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "siliconangle.com/files/2011/06/kaz-hirai.jpg\n", "etnosi.files.wordpress.com/2012/05/sofi-marinova-baku.jpg\n", "images.coveralia.com/audio/p/Pia_Zadora-When_The_Lights_Go_Out-Interior_Frontal.jpg\n", "4.bp.blogspot.com/-TFHOJVIW3a8/T_1mD6MdOxI/AAAAAAAADAg/PhKDPx0Aqu0/s1600/ivan_pavlov.jpg\n", "863793661388437597-a-1802744773732722657-s-sites.googlegroups.com/site/virginmarysite/Home/jackneosex.jpg\n", "amckiereads.files.wordpress.com/2010/12/darwish.jpg?w=600\n", "img.interia.pl/komputery/nimg/5/7/Kazuo_Hirai_plan_odbudowe_5726348.jpg\n", "2.bp.blogspot.com/-JAYvKsHcQPI/T4f3wbCIMDI/AAAAAAAAFDM/lTs3uKlb3A0/s1600/deeksha_seth_launches_chandana_brothers_showroom_Yellow+Saree+smiling+pics+%25285%2529.jpg\n", "1.bp.blogspot.com/-D3SI27GS7-g/U-iD5fPcFDI/AAAAAAAABOs/VaB_BRRa6OU/s320/news8.jpg\n", "1.bp.blogspot.com/_ilOjS7A_kk4/SVGCtcyAAmI/AAAAAAAAAH4/9-KKBqYeDBA/s400/playstation-3-grill_12.jpg\n" ] } ], "source": [ "for other in others[:10]:\n", " print(other)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "21319" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(results['flickr.com']) +len(results['wikimedia.org']) + len(others)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "megapixels", "language": "python", "name": "megapixels" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }