{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pull Google Spreadsheet" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "from os.path import join\n", "from glob import glob\n", "from pathlib import Path\n", "import requests\n", "import json\n", "from pprint import pprint\n", "from multiprocessing.pool import ThreadPool\n", "import threading\n", "import urllib.request\n", "import difflib\n", "import unidecode\n", "\n", "import slugify\n", "from tqdm import tqdm_notebook as tqdm\n", "import pandas as pd\n", "from scipy.io import loadmat\n", "import numpy as np\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.append('/work/megapixels_dev/megapixels')\n", "from app.utils import api_utils, identity_utils\n", "from app.settings import app_cfg\n", "from app.settings import types" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## To CSV" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# add CASIA Webface\n", "\n", "import os\n", "import click\n", "import re\n", "import os\n", "import csv\n", "import string\n", "import codecs\n", "import gspread\n", "from os.path import join\n", "from pathlib import Path\n", "import simplejson as json\n", "from oauth2client.service_account import ServiceAccountCredentials\n", "\n", "from app.settings import types\n", "from app.settings import app_cfg" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def fetch_spreadsheet():\n", " \"\"\"Open the Google Spreadsheet, which contains the individual worksheets\"\"\"\n", " scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']\n", " fp_creds = join(app_cfg.DIR_ROOT, 'scraper/.creds/Megapixels-ef28f91112a9.json')\n", " credentials = ServiceAccountCredentials.from_json_keyfile_name(fp_creds, scope)\n", " docid = \"1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc\"\n", " client = gspread.authorize(credentials)\n", " spreadsheet = client.open_by_key(docid)\n", " return spreadsheet\n", "\n", "def fetch_worksheet(name=\"institutions\"):\n", " \"\"\"Get a reference to a particular \"worksheet\" from the Google Spreadsheet\"\"\"\n", " spreadsheet = fetch_spreadsheet()\n", " return spreadsheet.worksheet(name)\n", "\n", "def fetch_google_sheet(name=\"institutions\"):\n", " \"\"\"Get all the values from a particular worksheet as a list of lists.\n", " Returns:\n", " :keys - the first row of the document\n", " :lines - a list of lists with the rest of the rows\"\"\"\n", " rows = fetch_worksheet(name).get_all_values()\n", " keys = rows[0]\n", " lines = rows[1:]\n", " return keys, lines\n", "\n", "def fetch_google_sheet_objects(name):\n", " \"\"\"Get all the values from a worksheet as a list of dictionaries\"\"\"\n", " keys, rows = fetch_google_sheet(name)\n", " recs = []\n", " for row in rows:\n", " rec = {}\n", " for index, key in enumerate(keys):\n", " rec[key] = row[index]\n", " recs.append(rec)\n", " return recs\n", "\n", "def fetch_google_lookup(name, item_key='key'):\n", " \"\"\"Get all the values from a worksheet as a dictionary of dictionaries.\n", " Specify which field you want to use as the dictionary key.\"\"\"\n", " keys, rows = fetch_google_sheet(name)\n", " lookup = {}\n", " for row in rows:\n", " rec = {}\n", " for index, key in enumerate(keys):\n", " rec[key] = row[index]\n", " lookup[rec[item_key]] = rec\n", " return lookup" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "sheet_datasets = fetch_google_sheet_objects(name='datasets')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "sheet_stats = fetch_google_sheet_objects(name='statistics')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'key': '10k_US_adult_faces',\n", " 'name': '10K US Adult Faces',\n", " 'berit': 'Y',\n", " 'charlie': '',\n", " 'adam': '',\n", " 'priority': '',\n", " 'wild': 'Y',\n", " 'indoor': '',\n", " 'outdoor': '',\n", " 'cyberspace': 'Y',\n", " 'names': '',\n", " 'downloaded': '',\n", " 'year_start': '',\n", " 'year_end': '',\n", " 'year_published': '2013',\n", " 'ongoing': '',\n", " 'images': '10,168 ',\n", " 'videos': '',\n", " 'faces_unique': '10,168 ',\n", " 'total_faces': '',\n", " 'img_per_person': '',\n", " 'num_cameras': '',\n", " 'faces_persons': '',\n", " 'female': '4362',\n", " 'male': '5806',\n", " 'landmarks': '77 ',\n", " 'width': '',\n", " 'height': '256',\n", " 'color': '',\n", " 'gray': '',\n", " 'derivative_of': '',\n", " 'tags': 'fr',\n", " 'source': 'google',\n", " 'purpose_short': 'US adult faces database',\n", " 'size_gb': '',\n", " 'agreement': '',\n", " 'agree_requied': '',\n", " 'agreement_signed': '',\n", " 'comment': 'Using an online random name generator based on the 1990 U.S. Census name distribution we randomly sampled 25,000 first and last names and automatically downloaded from Google Image Search',\n", " 'comment 2': '',\n", " 'comment 3': '',\n", " '': ''}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet_stats[0]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['', 'added_on', 'comments', 'faces', 'ft_share', 'key', 'name_full',\n", " 'name_short', 'pdf_paper', 'relevance', 'subset_of', 'superset_of',\n", " 'url', 'using'],\n", " dtype='object')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_datasets.keys()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sheet = fetch_google_lookup(name=opt_spreadsheet)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'key': 'lfw', 'name_short': 'LFW', 'using': 'Y', 'ft_share': '1', 'subset_of': '', 'superset_of': '', 'name_full': 'Labeled Faces in-the-Wild', 'url': 'http://vis-www.cs.umass.edu/lfw/', 'added_on': '', 'faces': '', 'pdf_paper': 'Y', 'comments': 'many references, only included first on from 2007', '': '', 'relevance': '10'}\n" ] } ], "source": [ "print(sheet['lfw'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10k_US_adult_faces\n", "3d_rma\n", "3dddb_unconstrained\n", "3dpes\n", "4dfab\n", "50_people_one_question\n", "a_pascal_yahoo\n", "aberdeen \n", "adience\n", "afad\n", "afew_va\n", "affectnet\n", "aflw\n", "afw\n", "agedb\n", "alert_airport\n", "am_fed\n", "apis\n", "ar_facedb\n", "awe_ears\n", "b3d_ac\n", "bbc_pose\n", "berkeley_pose\n", "bfm\n", "bio_id\n", "bjut_3d\n", "bosphorus\n", "bp4d_plus\n", "bp4d_spontanous\n", "brainwash\n", "bu_3dfe\n", "buhmap_db\n", "cafe\n", "caltech_10k_web_faces\n", "caltech_faces\n", "caltech_pedestrians\n", "camel\n", "cas_peal\n", "casablanca\n", "casia_webface\n", "caviar4reid\n", "celeba\n", "celeba_plus\n", "cfd\n", "chalearn\n", "chokepoint\n", "cityscapes\n", "clothing_co_parsing\n", "cmdp\n", "cmu_pie\n", "coco\n", "coco_action\n", "coco_qa\n", "cofw\n", "cohn_kanade\n", "cohn_kanade_plus\n", "columbia_gaze\n", "complex_activities\n", "cuhk01\n", "cuhk02\n", "cuhk03\n", "cvc_01_barcelona\n", "czech_news_agency\n", "d3dfacs\n", "dartmouth_children\n", "data_61\n", "deep_fashion\n", "disfa\n", "distance_nighttime\n", "duke_mtmc\n", "emotio_net\n", "eth_andreas_ess\n", "europersons\n", "expw\n", "face_research_lab\n", "face_scrub\n", "face_tracer\n", "facebook\n", "facebook_100\n", "faceplace\n", "faces94\n", "faces95\n", "faces96\n", "families_in_the_wild\n", "fddb\n", "fei\n", "feret\n", "ferplus\n", "fia\n", "fiw_300\n", "florida_inmates\n", "frav2d\n", "frav3d\n", "grimace\n", "frgc\n", "gallagher\n", "gavab_db\n", "geofaces\n", "georgia_tech_face_database\n", "gmu\n", "google\n", "graz\n", "h3d\n", "hda_plus\n", "helen\n", "hi4d_adsip\n", "hid_equinox_infrared\n", "hipsterwars\n", "hollywood_headset\n", "hrt_transgender\n", "ifad\n", "ifdb\n", "iit_dehli_ear\n", "ijb_a\n", "ijb_b\n", "ijb_c\n", "ijb_s\n", "ilids_mcts\n", "ilids_vid_reid\n", "images_of_groups\n", "imdb_wiki\n", "imdb_face\n", "imfdb\n", "imm_face\n", "immediacy\n", "imsitu\n", "inria_person\n", "iqiyi\n", "jaffe\n", "jiku_mobile\n", "jpl_pose\n", "karpathy_instagram\n", "kdef\n", "kin_face\n", "kinectface\n", "kitti\n", "lag\n", "large_scale_person_search\n", "leeds_sports_pose\n", "leeds_sports_pose_extended\n", "lfw\n", "lfw_a\n", "lfw_p\n", "m2vts\n", "m2vtsdb_extended\n", "mafl\n", "malf\n", "mapillary\n", "market_1501\n", "market1203\n", "mars\n", "mcgill\n", "meds\n", "megaage\n", "megaface\n", "mifs\n", "mikki\n", "mit_cbcl\n", "mit_cbcl_ped\n", "mit_cbclss\n", "miw\n", "mmi_facial_expression\n", "moments_in_time\n", "morph\n", "morph_nc\n", "mot\n", "mpi_large\n", "mpi_small\n", "mpii_gaze\n", "mpii_human_pose\n", "mr2\n", "mrp_drone\n", "msceleb\n", "msmt_17\n", "muct\n", "mug_faces\n", "multi_pie\n", "mtfl\n", "names_and_faces_news\n", "nd_2006\n", "nist_mid_mugshot\n", "nova_emotions\n", "nudedetection\n", "orl\n", "penn_fudan\n", "peta\n", "pets\n", "pilot_parliament\n", "pipa\n", "pku\n", "pku_reid\n", "pornodb\n", "precarious\n", "prid\n", "prw\n", "psu\n", "pubfig\n", "pubfig_83\n", "put_face\n", "qmul_grid\n", "qmul_ilids\n", "qmul_surv_face\n", "rafd\n", "raid\n", "rap_pedestrian\n", "reseed\n", "saivt\n", "samm\n", "sarc3d\n", "scface\n", "scut_fbp\n", "scut_head\n", "sdu_vid\n", "urban_sed\n", "sheffield\n", "shinpuhkan_2014\n", "social_relation\n", "soton\n", "sports_videos_in_the_wild\n", "stair_actions\n", "stanford_drone\n", "stickmen_buffy\n", "stickmen_family\n", "stickmen_pascal\n", "stirling_esrc_3s\n", "sun_attributes\n", "svs\n", "texas_3dfrd\n", "tiny_faces\n", "tiny_images\n", "towncenter\n", "tud_brussels\n", "tud_campus\n", "tud_crossing\n", "tud_motionpairs\n", "tud_multiview\n", "tud_pedestrian\n", "tud_stadtmitte\n", "tvhi\n", "twinsburg_twins\n", "uccs\n", "ucf_101\n", "ucf_crowd\n", "ucf_selfie\n", "ufdd\n", "umb\n", "umd_faces\n", "unbc_shoulder_pain\n", "urban_tribes\n", "used\n", "utk_face\n", "v47\n", "vadana\n", "vgg_celebs_in_places\n", "vgg_faces\n", "vgg_faces2\n", "violent_flows\n", "viper\n", "visual_phrases\n", "vmu\n", "voc\n", "vqa\n", "ward\n", "who_goes_there\n", "wider\n", "wider_face\n", "wider_attribute\n", "wildtrack\n", "yale_faces\n", "yale_faces_b\n", "yale_faces_b_ext\n", "yawdd\n", "yfcc_100m\n", "york_3d\n", "youtube_faces\n", "youtube_makeup\n", "youtube_poses\n", "wlfdb\n", "sal\n", "semaine\n", "belfast_naturalistic\n", "belfast_induced\n", "vam_faces\n", "manhob_hci\n", "deap\n", "amfed\n", "recola\n", "avec_13\n", "avec_14\n", "mimicry\n", "meissner\n", "nottingham_scans\n", "nottingham_orig\n", "stirling_pain\n", "utrecht_ecvp\n", "mooney\n", "\n", "vcr\n", "hufrd_pilgrims\n", "vidtimit\n", "casme\n", "face_place\n", "flickr_faces\n", "hku_face\n", "ibm_dif\n", "gfw\n", "kasparov_rgbd\n", "vap_rgbd\n", "aau_thermal_sports\n", "aau_thermal_soccer\n", "wdref\n", "pic\n", "ucfi\n", "ldhf\n", "nfrad\n", "specface\n" ] } ], "source": [ "for k, v in sheet.items():\n", " print(k)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "megapixels", "language": "python", "name": "megapixels" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }