{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# UCCS Exif\n", "\n", "- read in all images and extract metadata\n", "- export to CSV" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "from os.path import join\n", "from glob import glob\n", "from pprint import pprint\n", "\n", "import cv2 as cv\n", "import pandas as pd\n", "from PIL import Image, ImageDraw, ExifTags\n", "\n", "from pathlib import Path\n", "from tqdm import tqdm_notebook as tqdm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fp_dir_uccs = '/data_store/datasets/people/uccs/dataset'\n", "fp_dir_ims = '/data_store/datasets/people/uccs/dataset/media/original'\n", "fp_out_exif = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif.csv'\n", "fp_out_exif_test = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif_test.csv'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10917\n" ] } ], "source": [ "fp_ims = glob(join(fp_dir_ims, '*.jpg'))\n", "print(len(fp_ims))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def get_exif(fp_im, raw=False):\n", " im = Image.open(fp_im)\n", " exif_raw = im._getexif() \n", " exif_data = {}\n", " for tag, value in exif_raw.items():\n", " decoded = ExifTags.TAGS.get(tag, tag)\n", " exif_data[decoded] = value\n", " if raw:\n", " return exif_data\n", " else:\n", " im_w = exif_data['ExifImageWidth']\n", " im_h = exif_data['ExifImageHeight']\n", " #date_time_digitized = exif_data['DateTimeDigitized']\n", " #date_time_original = exif_data['DateTimeOriginal']\n", " date_times = exif_data['DateTime'].split(' ')\n", " date_time = date_times[0].replace(':', '-') + ' ' + date_times[1]\n", " copyright = exif_data['Copyright']\n", " aperture_value = exif_data['ApertureValue'][0] / exif_data['ApertureValue'][0]\n", " fnumber = exif_data['FNumber'][0] / exif_data['FNumber'][1]\n", " focal_length = int(exif_data['FocalLength'][0] / exif_data['FocalLength'][1])\n", " iso = int(exif_data['ISOSpeedRatings'])\n", " exposure_mode = exif_data['ExposureMode']\n", " exposure_program = exif_data['ExposureProgram']\n", " #focal_plane_x = exif_data['FocalPlaneXResolution'][0] / exif_data['FocalPlaneXResolution'][1]\n", " #focal_plane_y = exif_data['FocalPlaneYResolution'][0] / exif_data['FocalPlaneYResolution'][1]\n", " result = {\n", " # 'image_width': im_w,\n", " # 'image_height': im_h,\n", " 'date_time': date_time,\n", " #'date_time_digitized': date_time_digitized,\n", " #'date_time_original': date_time_original,\n", " #'copyright': copyright,\n", " 'aperture': aperture_value,\n", " 'fnumber': fnumber,\n", " 'focal_length': focal_length,\n", " 'exposure_program': exposure_program,\n", " 'exposure_mode': exposure_mode,\n", " 'iso': iso\n", " }\n", " return result\n", " " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "285797b2b773435aba4a89740ddd38e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=10917), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "exif_data = []\n", "for fp_im in tqdm(fp_ims):\n", " im_exif = get_exif(fp_im)\n", " im_exif.update({'filename': Path(fp_im).name})\n", " exif_data.append(im_exif)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "for ex in exif_data:\n", " #ex['focal_length'] = int(ex['focal_length'])\n", " splits = ex['date_time'].split(' ')\n", " ex['date'] = splits[0]\n", " ex['time'] = splits[1]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df_exif = pd.DataFrame.from_dict(exif_data)\n", "df_exif['date_time'] = pd.to_datetime(df_exif['date_time'])\n", "#df_exif.to_csv(fp_out_exif, index=False)\n", "df_exif.to_csv(fp_out_exif, index=False)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
aperturedatedate_timeexposure_modeexposure_programfilenamefnumberfocal_lengthisotime
01.02013-01-292013-01-29 12:07:4511e87a43c32cc697d3e6b40be3e3594057.jpg5.680010012:07:45
11.02012-04-032012-04-03 11:07:53039d15290fdd811d5cbaeb44448a4b54d3.jpg5.680040011:07:53
21.02013-01-292013-01-29 12:08:0111d9cad73c2f47022195169e07f21dc567.jpg5.680010012:08:01
31.02013-02-202013-02-20 12:16:3511decf44da0b963a33c88362e613878820.jpg5.680016012:16:35
41.02013-02-192013-02-19 16:30:51114a59b6b9b50cf6fc87e45caa0fdb86df.jpg5.680040016:30:51
\n", "
" ], "text/plain": [ " aperture date date_time exposure_mode exposure_program \\\n", "0 1.0 2013-01-29 2013-01-29 12:07:45 1 1 \n", "1 1.0 2012-04-03 2012-04-03 11:07:53 0 3 \n", "2 1.0 2013-01-29 2013-01-29 12:08:01 1 1 \n", "3 1.0 2013-02-20 2013-02-20 12:16:35 1 1 \n", "4 1.0 2013-02-19 2013-02-19 16:30:51 1 1 \n", "\n", " filename fnumber focal_length iso time \n", "0 e87a43c32cc697d3e6b40be3e3594057.jpg 5.6 800 100 12:07:45 \n", "1 9d15290fdd811d5cbaeb44448a4b54d3.jpg 5.6 800 400 11:07:53 \n", "2 d9cad73c2f47022195169e07f21dc567.jpg 5.6 800 100 12:08:01 \n", "3 decf44da0b963a33c88362e613878820.jpg 5.6 800 160 12:16:35 \n", "4 4a59b6b9b50cf6fc87e45caa0fdb86df.jpg 5.6 800 400 16:30:51 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_exif.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "exif_dates = df_exif.groupby('date')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18\n", "['2012-02-23',\n", " '2012-03-06',\n", " '2012-03-08',\n", " '2012-03-13',\n", " '2012-03-20',\n", " '2012-03-22',\n", " '2012-04-03',\n", " '2012-04-12',\n", " '2012-04-17',\n", " '2012-04-24',\n", " '2012-04-25',\n", " '2012-04-26',\n", " '2013-01-28',\n", " '2013-01-29',\n", " '2013-02-13',\n", " '2013-02-19',\n", " '2013-02-20',\n", " '2013-02-26']\n" ] } ], "source": [ "dates = list(exif_dates.groups.keys())\n", "print(len(dates))\n", "pprint(dates)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "for exif_date in exif_dates:\n", " print(exif_date.index)\n", " idx = exif_date\n", " break" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "builtin_function_or_method" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(idx.count)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "megapixels", "language": "python", "name": "megapixels" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }