summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/uccs/uccs_exif.ipynb')
-rw-r--r--megapixels/notebooks/datasets/uccs/uccs_exif.ipynb421
1 files changed, 421 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb b/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb
new file mode 100644
index 00000000..c4d37b39
--- /dev/null
+++ b/megapixels/notebooks/datasets/uccs/uccs_exif.ipynb
@@ -0,0 +1,421 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# UCCS Exif\n",
+ "\n",
+ "- read in all images and extract metadata\n",
+ "- export to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "from os.path import join\n",
+ "from glob import glob\n",
+ "from pprint import pprint\n",
+ "\n",
+ "import cv2 as cv\n",
+ "import pandas as pd\n",
+ "from PIL import Image, ImageDraw, ExifTags\n",
+ "\n",
+ "from pathlib import Path\n",
+ "from tqdm import tqdm_notebook as tqdm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_dir_uccs = '/data_store/datasets/people/uccs/dataset'\n",
+ "fp_dir_ims = '/data_store/datasets/people/uccs/dataset/media/original'\n",
+ "fp_out_exif = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif.csv'\n",
+ "fp_out_exif_test = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif_test.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10917\n"
+ ]
+ }
+ ],
+ "source": [
+ "fp_ims = glob(join(fp_dir_ims, '*.jpg'))\n",
+ "print(len(fp_ims))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_exif(fp_im, raw=False):\n",
+ " im = Image.open(fp_im)\n",
+ " exif_raw = im._getexif() \n",
+ " exif_data = {}\n",
+ " for tag, value in exif_raw.items():\n",
+ " decoded = ExifTags.TAGS.get(tag, tag)\n",
+ " exif_data[decoded] = value\n",
+ " if raw:\n",
+ " return exif_data\n",
+ " else:\n",
+ " im_w = exif_data['ExifImageWidth']\n",
+ " im_h = exif_data['ExifImageHeight']\n",
+ " #date_time_digitized = exif_data['DateTimeDigitized']\n",
+ " #date_time_original = exif_data['DateTimeOriginal']\n",
+ " date_times = exif_data['DateTime'].split(' ')\n",
+ " date_time = date_times[0].replace(':', '-') + ' ' + date_times[1]\n",
+ " copyright = exif_data['Copyright']\n",
+ " aperture_value = exif_data['ApertureValue'][0] / exif_data['ApertureValue'][0]\n",
+ " fnumber = exif_data['FNumber'][0] / exif_data['FNumber'][1]\n",
+ " focal_length = int(exif_data['FocalLength'][0] / exif_data['FocalLength'][1])\n",
+ " iso = int(exif_data['ISOSpeedRatings'])\n",
+ " exposure_mode = exif_data['ExposureMode']\n",
+ " exposure_program = exif_data['ExposureProgram']\n",
+ " #focal_plane_x = exif_data['FocalPlaneXResolution'][0] / exif_data['FocalPlaneXResolution'][1]\n",
+ " #focal_plane_y = exif_data['FocalPlaneYResolution'][0] / exif_data['FocalPlaneYResolution'][1]\n",
+ " result = {\n",
+ " # 'image_width': im_w,\n",
+ " # 'image_height': im_h,\n",
+ " 'date_time': date_time,\n",
+ " #'date_time_digitized': date_time_digitized,\n",
+ " #'date_time_original': date_time_original,\n",
+ " #'copyright': copyright,\n",
+ " 'aperture': aperture_value,\n",
+ " 'fnumber': fnumber,\n",
+ " 'focal_length': focal_length,\n",
+ " 'exposure_program': exposure_program,\n",
+ " 'exposure_mode': exposure_mode,\n",
+ " 'iso': iso\n",
+ " }\n",
+ " return result\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "285797b2b773435aba4a89740ddd38e4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=10917), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "exif_data = []\n",
+ "for fp_im in tqdm(fp_ims):\n",
+ " im_exif = get_exif(fp_im)\n",
+ " im_exif.update({'filename': Path(fp_im).name})\n",
+ " exif_data.append(im_exif)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for ex in exif_data:\n",
+ " #ex['focal_length'] = int(ex['focal_length'])\n",
+ " splits = ex['date_time'].split(' ')\n",
+ " ex['date'] = splits[0]\n",
+ " ex['time'] = splits[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_exif = pd.DataFrame.from_dict(exif_data)\n",
+ "df_exif['date_time'] = pd.to_datetime(df_exif['date_time'])\n",
+ "#df_exif.to_csv(fp_out_exif, index=False)\n",
+ "df_exif.to_csv(fp_out_exif, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>aperture</th>\n",
+ " <th>date</th>\n",
+ " <th>date_time</th>\n",
+ " <th>exposure_mode</th>\n",
+ " <th>exposure_program</th>\n",
+ " <th>filename</th>\n",
+ " <th>fnumber</th>\n",
+ " <th>focal_length</th>\n",
+ " <th>iso</th>\n",
+ " <th>time</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>1.0</td>\n",
+ " <td>2013-01-29</td>\n",
+ " <td>2013-01-29 12:07:45</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>e87a43c32cc697d3e6b40be3e3594057.jpg</td>\n",
+ " <td>5.6</td>\n",
+ " <td>800</td>\n",
+ " <td>100</td>\n",
+ " <td>12:07:45</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>1.0</td>\n",
+ " <td>2012-04-03</td>\n",
+ " <td>2012-04-03 11:07:53</td>\n",
+ " <td>0</td>\n",
+ " <td>3</td>\n",
+ " <td>9d15290fdd811d5cbaeb44448a4b54d3.jpg</td>\n",
+ " <td>5.6</td>\n",
+ " <td>800</td>\n",
+ " <td>400</td>\n",
+ " <td>11:07:53</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>1.0</td>\n",
+ " <td>2013-01-29</td>\n",
+ " <td>2013-01-29 12:08:01</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>d9cad73c2f47022195169e07f21dc567.jpg</td>\n",
+ " <td>5.6</td>\n",
+ " <td>800</td>\n",
+ " <td>100</td>\n",
+ " <td>12:08:01</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>1.0</td>\n",
+ " <td>2013-02-20</td>\n",
+ " <td>2013-02-20 12:16:35</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>decf44da0b963a33c88362e613878820.jpg</td>\n",
+ " <td>5.6</td>\n",
+ " <td>800</td>\n",
+ " <td>160</td>\n",
+ " <td>12:16:35</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>1.0</td>\n",
+ " <td>2013-02-19</td>\n",
+ " <td>2013-02-19 16:30:51</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>4a59b6b9b50cf6fc87e45caa0fdb86df.jpg</td>\n",
+ " <td>5.6</td>\n",
+ " <td>800</td>\n",
+ " <td>400</td>\n",
+ " <td>16:30:51</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " aperture date date_time exposure_mode exposure_program \\\n",
+ "0 1.0 2013-01-29 2013-01-29 12:07:45 1 1 \n",
+ "1 1.0 2012-04-03 2012-04-03 11:07:53 0 3 \n",
+ "2 1.0 2013-01-29 2013-01-29 12:08:01 1 1 \n",
+ "3 1.0 2013-02-20 2013-02-20 12:16:35 1 1 \n",
+ "4 1.0 2013-02-19 2013-02-19 16:30:51 1 1 \n",
+ "\n",
+ " filename fnumber focal_length iso time \n",
+ "0 e87a43c32cc697d3e6b40be3e3594057.jpg 5.6 800 100 12:07:45 \n",
+ "1 9d15290fdd811d5cbaeb44448a4b54d3.jpg 5.6 800 400 11:07:53 \n",
+ "2 d9cad73c2f47022195169e07f21dc567.jpg 5.6 800 100 12:08:01 \n",
+ "3 decf44da0b963a33c88362e613878820.jpg 5.6 800 160 12:16:35 \n",
+ "4 4a59b6b9b50cf6fc87e45caa0fdb86df.jpg 5.6 800 400 16:30:51 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_exif.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "exif_dates = df_exif.groupby('date')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "18\n",
+ "['2012-02-23',\n",
+ " '2012-03-06',\n",
+ " '2012-03-08',\n",
+ " '2012-03-13',\n",
+ " '2012-03-20',\n",
+ " '2012-03-22',\n",
+ " '2012-04-03',\n",
+ " '2012-04-12',\n",
+ " '2012-04-17',\n",
+ " '2012-04-24',\n",
+ " '2012-04-25',\n",
+ " '2012-04-26',\n",
+ " '2013-01-28',\n",
+ " '2013-01-29',\n",
+ " '2013-02-13',\n",
+ " '2013-02-19',\n",
+ " '2013-02-20',\n",
+ " '2013-02-26']\n"
+ ]
+ }
+ ],
+ "source": [
+ "dates = list(exif_dates.groups.keys())\n",
+ "print(len(dates))\n",
+ "pprint(dates)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<built-in method index of tuple object at 0x7fc507b54b48>\n"
+ ]
+ }
+ ],
+ "source": [
+ "for exif_date in exif_dates:\n",
+ " print(exif_date.index)\n",
+ " idx = exif_date\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "builtin_function_or_method"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(idx.count)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}