diff options
Diffstat (limited to 'megapixels/notebooks/datasets/uccs/uccs_exif.ipynb')
| -rw-r--r-- | megapixels/notebooks/datasets/uccs/uccs_exif.ipynb | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb b/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb new file mode 100644 index 00000000..c4d37b39 --- /dev/null +++ b/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# UCCS Exif\n", + "\n", + "- read in all images and extract metadata\n", + "- export to CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from os.path import join\n", + "from glob import glob\n", + "from pprint import pprint\n", + "\n", + "import cv2 as cv\n", + "import pandas as pd\n", + "from PIL import Image, ImageDraw, ExifTags\n", + "\n", + "from pathlib import Path\n", + "from tqdm import tqdm_notebook as tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "fp_dir_uccs = '/data_store/datasets/people/uccs/dataset'\n", + "fp_dir_ims = '/data_store/datasets/people/uccs/dataset/media/original'\n", + "fp_out_exif = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif.csv'\n", + "fp_out_exif_test = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif_test.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10917\n" + ] + } + ], + "source": [ + "fp_ims = glob(join(fp_dir_ims, '*.jpg'))\n", + "print(len(fp_ims))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def get_exif(fp_im, raw=False):\n", + " im = Image.open(fp_im)\n", + " exif_raw = im._getexif() \n", + " exif_data = {}\n", + " for tag, value in exif_raw.items():\n", + " decoded = ExifTags.TAGS.get(tag, tag)\n", + " exif_data[decoded] = value\n", + " if raw:\n", + " return exif_data\n", + " else:\n", + " im_w = exif_data['ExifImageWidth']\n", + " im_h = exif_data['ExifImageHeight']\n", + " #date_time_digitized = exif_data['DateTimeDigitized']\n", + " #date_time_original = exif_data['DateTimeOriginal']\n", + " date_times = exif_data['DateTime'].split(' ')\n", + " date_time = date_times[0].replace(':', '-') + ' ' + date_times[1]\n", + " copyright = exif_data['Copyright']\n", + " aperture_value = exif_data['ApertureValue'][0] / exif_data['ApertureValue'][0]\n", + " fnumber = exif_data['FNumber'][0] / exif_data['FNumber'][1]\n", + " focal_length = int(exif_data['FocalLength'][0] / exif_data['FocalLength'][1])\n", + " iso = int(exif_data['ISOSpeedRatings'])\n", + " exposure_mode = exif_data['ExposureMode']\n", + " exposure_program = exif_data['ExposureProgram']\n", + " #focal_plane_x = exif_data['FocalPlaneXResolution'][0] / exif_data['FocalPlaneXResolution'][1]\n", + " #focal_plane_y = exif_data['FocalPlaneYResolution'][0] / exif_data['FocalPlaneYResolution'][1]\n", + " result = {\n", + " # 'image_width': im_w,\n", + " # 'image_height': im_h,\n", + " 'date_time': date_time,\n", + " #'date_time_digitized': date_time_digitized,\n", + " #'date_time_original': date_time_original,\n", + " #'copyright': copyright,\n", + " 'aperture': aperture_value,\n", + " 'fnumber': fnumber,\n", + " 'focal_length': focal_length,\n", + " 'exposure_program': exposure_program,\n", + " 'exposure_mode': exposure_mode,\n", + " 'iso': iso\n", + " }\n", + " return result\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "285797b2b773435aba4a89740ddd38e4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=10917), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "exif_data = []\n", + "for fp_im in tqdm(fp_ims):\n", + " im_exif = get_exif(fp_im)\n", + " im_exif.update({'filename': Path(fp_im).name})\n", + " exif_data.append(im_exif)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "for ex in exif_data:\n", + " #ex['focal_length'] = int(ex['focal_length'])\n", + " splits = ex['date_time'].split(' ')\n", + " ex['date'] = splits[0]\n", + " ex['time'] = splits[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "df_exif = pd.DataFrame.from_dict(exif_data)\n", + "df_exif['date_time'] = pd.to_datetime(df_exif['date_time'])\n", + "#df_exif.to_csv(fp_out_exif, index=False)\n", + "df_exif.to_csv(fp_out_exif, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>aperture</th>\n", + " <th>date</th>\n", + " <th>date_time</th>\n", + " <th>exposure_mode</th>\n", + " <th>exposure_program</th>\n", + " <th>filename</th>\n", + " <th>fnumber</th>\n", + " <th>focal_length</th>\n", + " <th>iso</th>\n", + " <th>time</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1.0</td>\n", + " <td>2013-01-29</td>\n", + " <td>2013-01-29 12:07:45</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>e87a43c32cc697d3e6b40be3e3594057.jpg</td>\n", + " <td>5.6</td>\n", + " <td>800</td>\n", + " <td>100</td>\n", + " <td>12:07:45</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1.0</td>\n", + " <td>2012-04-03</td>\n", + " <td>2012-04-03 11:07:53</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>9d15290fdd811d5cbaeb44448a4b54d3.jpg</td>\n", + " <td>5.6</td>\n", + " <td>800</td>\n", + " <td>400</td>\n", + " <td>11:07:53</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1.0</td>\n", + " <td>2013-01-29</td>\n", + " <td>2013-01-29 12:08:01</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>d9cad73c2f47022195169e07f21dc567.jpg</td>\n", + " <td>5.6</td>\n", + " <td>800</td>\n", + " <td>100</td>\n", + " <td>12:08:01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1.0</td>\n", + " <td>2013-02-20</td>\n", + " <td>2013-02-20 12:16:35</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>decf44da0b963a33c88362e613878820.jpg</td>\n", + " <td>5.6</td>\n", + " <td>800</td>\n", + " <td>160</td>\n", + " <td>12:16:35</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1.0</td>\n", + " <td>2013-02-19</td>\n", + " <td>2013-02-19 16:30:51</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>4a59b6b9b50cf6fc87e45caa0fdb86df.jpg</td>\n", + " <td>5.6</td>\n", + " <td>800</td>\n", + " <td>400</td>\n", + " <td>16:30:51</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " aperture date date_time exposure_mode exposure_program \\\n", + "0 1.0 2013-01-29 2013-01-29 12:07:45 1 1 \n", + "1 1.0 2012-04-03 2012-04-03 11:07:53 0 3 \n", + "2 1.0 2013-01-29 2013-01-29 12:08:01 1 1 \n", + "3 1.0 2013-02-20 2013-02-20 12:16:35 1 1 \n", + "4 1.0 2013-02-19 2013-02-19 16:30:51 1 1 \n", + "\n", + " filename fnumber focal_length iso time \n", + "0 e87a43c32cc697d3e6b40be3e3594057.jpg 5.6 800 100 12:07:45 \n", + "1 9d15290fdd811d5cbaeb44448a4b54d3.jpg 5.6 800 400 11:07:53 \n", + "2 d9cad73c2f47022195169e07f21dc567.jpg 5.6 800 100 12:08:01 \n", + "3 decf44da0b963a33c88362e613878820.jpg 5.6 800 160 12:16:35 \n", + "4 4a59b6b9b50cf6fc87e45caa0fdb86df.jpg 5.6 800 400 16:30:51 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_exif.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "exif_dates = df_exif.groupby('date')" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18\n", + "['2012-02-23',\n", + " '2012-03-06',\n", + " '2012-03-08',\n", + " '2012-03-13',\n", + " '2012-03-20',\n", + " '2012-03-22',\n", + " '2012-04-03',\n", + " '2012-04-12',\n", + " '2012-04-17',\n", + " '2012-04-24',\n", + " '2012-04-25',\n", + " '2012-04-26',\n", + " '2013-01-28',\n", + " '2013-01-29',\n", + " '2013-02-13',\n", + " '2013-02-19',\n", + " '2013-02-20',\n", + " '2013-02-26']\n" + ] + } + ], + "source": [ + "dates = list(exif_dates.groups.keys())\n", + "print(len(dates))\n", + "pprint(dates)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<built-in method index of tuple object at 0x7fc507b54b48>\n" + ] + } + ], + "source": [ + "for exif_date in exif_dates:\n", + " print(exif_date.index)\n", + " idx = exif_date\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "builtin_function_or_method" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(idx.count)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "megapixels", + "language": "python", + "name": "megapixels" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} |
