{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# UCCS Exif\n", "\n", "- read in all images and extract metadata\n", "- export to CSV" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "from os.path import join\n", "from glob import glob\n", "from pprint import pprint\n", "\n", "import cv2 as cv\n", "import pandas as pd\n", "from PIL import Image, ImageDraw, ExifTags\n", "\n", "from pathlib import Path\n", "from tqdm import tqdm_notebook as tqdm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fp_dir_uccs = '/data_store/datasets/people/uccs/dataset'\n", "fp_dir_ims = '/data_store/datasets/people/uccs/dataset/media/original'\n", "fp_out_exif = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif.csv'\n", "fp_out_exif_test = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif_test.csv'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10917\n" ] } ], "source": [ "fp_ims = glob(join(fp_dir_ims, '*.jpg'))\n", "print(len(fp_ims))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def get_exif(fp_im, raw=False):\n", " im = Image.open(fp_im)\n", " exif_raw = im._getexif() \n", " exif_data = {}\n", " for tag, value in exif_raw.items():\n", " decoded = ExifTags.TAGS.get(tag, tag)\n", " exif_data[decoded] = value\n", " if raw:\n", " return exif_data\n", " else:\n", " im_w = exif_data['ExifImageWidth']\n", " im_h = exif_data['ExifImageHeight']\n", " #date_time_digitized = exif_data['DateTimeDigitized']\n", " #date_time_original = exif_data['DateTimeOriginal']\n", " date_times = exif_data['DateTime'].split(' ')\n", " date_time = date_times[0].replace(':', '-') + ' ' + date_times[1]\n", " copyright = exif_data['Copyright']\n", " aperture_value = exif_data['ApertureValue'][0] / exif_data['ApertureValue'][0]\n", " fnumber = exif_data['FNumber'][0] / exif_data['FNumber'][1]\n", " focal_length = int(exif_data['FocalLength'][0] / exif_data['FocalLength'][1])\n", " iso = int(exif_data['ISOSpeedRatings'])\n", " exposure_mode = exif_data['ExposureMode']\n", " exposure_program = exif_data['ExposureProgram']\n", " #focal_plane_x = exif_data['FocalPlaneXResolution'][0] / exif_data['FocalPlaneXResolution'][1]\n", " #focal_plane_y = exif_data['FocalPlaneYResolution'][0] / exif_data['FocalPlaneYResolution'][1]\n", " result = {\n", " # 'image_width': im_w,\n", " # 'image_height': im_h,\n", " 'date_time': date_time,\n", " #'date_time_digitized': date_time_digitized,\n", " #'date_time_original': date_time_original,\n", " #'copyright': copyright,\n", " 'aperture': aperture_value,\n", " 'fnumber': fnumber,\n", " 'focal_length': focal_length,\n", " 'exposure_program': exposure_program,\n", " 'exposure_mode': exposure_mode,\n", " 'iso': iso\n", " }\n", " return result\n", " " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "285797b2b773435aba4a89740ddd38e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, max=10917), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "exif_data = []\n", "for fp_im in tqdm(fp_ims):\n", " im_exif = get_exif(fp_im)\n", " im_exif.update({'filename': Path(fp_im).name})\n", " exif_data.append(im_exif)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "for ex in exif_data:\n", " #ex['focal_length'] = int(ex['focal_length'])\n", " splits = ex['date_time'].split(' ')\n", " ex['date'] = splits[0]\n", " ex['time'] = splits[1]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df_exif = pd.DataFrame.from_dict(exif_data)\n", "df_exif['date_time'] = pd.to_datetime(df_exif['date_time'])\n", "#df_exif.to_csv(fp_out_exif, index=False)\n", "df_exif.to_csv(fp_out_exif, index=False)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | aperture | \n", "date | \n", "date_time | \n", "exposure_mode | \n", "exposure_program | \n", "filename | \n", "fnumber | \n", "focal_length | \n", "iso | \n", "time | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1.0 | \n", "2013-01-29 | \n", "2013-01-29 12:07:45 | \n", "1 | \n", "1 | \n", "e87a43c32cc697d3e6b40be3e3594057.jpg | \n", "5.6 | \n", "800 | \n", "100 | \n", "12:07:45 | \n", "
| 1 | \n", "1.0 | \n", "2012-04-03 | \n", "2012-04-03 11:07:53 | \n", "0 | \n", "3 | \n", "9d15290fdd811d5cbaeb44448a4b54d3.jpg | \n", "5.6 | \n", "800 | \n", "400 | \n", "11:07:53 | \n", "
| 2 | \n", "1.0 | \n", "2013-01-29 | \n", "2013-01-29 12:08:01 | \n", "1 | \n", "1 | \n", "d9cad73c2f47022195169e07f21dc567.jpg | \n", "5.6 | \n", "800 | \n", "100 | \n", "12:08:01 | \n", "
| 3 | \n", "1.0 | \n", "2013-02-20 | \n", "2013-02-20 12:16:35 | \n", "1 | \n", "1 | \n", "decf44da0b963a33c88362e613878820.jpg | \n", "5.6 | \n", "800 | \n", "160 | \n", "12:16:35 | \n", "
| 4 | \n", "1.0 | \n", "2013-02-19 | \n", "2013-02-19 16:30:51 | \n", "1 | \n", "1 | \n", "4a59b6b9b50cf6fc87e45caa0fdb86df.jpg | \n", "5.6 | \n", "800 | \n", "400 | \n", "16:30:51 | \n", "