{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Download FFHQ Images\n", "\n", "- https://github.com/NVlabs/ffhq-dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "from os.path import join\n", "from glob import glob\n", "from pathlib import Path\n", "import requests\n", "import json\n", "\n", "from tqdm import tqdm_notebook as tqdm\n", "import pandas as pd\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "import sys\n", "sys.path.append('/work/megapixels_dev/megapixels')\n", "from app.utils import file_utils" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# load JSON\n", "fp_in = '/data_store/datasets/people/ffhq/ffhq-dataset-v1.json'\n", "fp_out = '/data_store/datasets/people/ffhq/research/flickr_api_urls.csv'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "with open(fp_in, 'r') as fp:\n", " ffhq_items = json.load(fp)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "217c694742e8408d871c3b41183676fb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=70000), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "results = []\n", "# get photos urls\n", "for idx, ffhq_item in tqdm(ffhq_items.items()):\n", " url = ffhq_item.get('metadata').get('photo_url')\n", " photo_id = Path(url).stem\n", " obj = {'photo_id': photo_id}\n", " results.append(obj)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'photo_id': '1133484654'}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results[0]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame.from_dict(results)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "df.drop_duplicates(inplace=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "df.to_csv(fp_out, index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "megapixels", "language": "python", "name": "megapixels" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }