# UCCS Exif

- read in all images and extract metadata
- export to CSV

In [1]:
%reload_ext autoreload
%autoreload 2

from os.path import join
from glob import glob
from pprint import pprint

import cv2 as cv
import pandas as pd
from PIL import Image, ImageDraw, ExifTags

from pathlib import Path
from tqdm import tqdm_notebook as tqdm

In [2]:
fp_dir_uccs = '/data_store/datasets/people/uccs/dataset'
fp_dir_ims = '/data_store/datasets/people/uccs/dataset/media/original'
fp_out_exif = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif.csv'
fp_out_exif_test = '/data_store/datasets/people/uccs/processed/exif/uccs_camera_exif_test.csv'

In [3]:
fp_ims = glob(join(fp_dir_ims, '*.jpg'))
print(len(fp_ims))

10917


In [6]:
def get_exif(fp_im, raw=False):
  im = Image.open(fp_im)
  exif_raw = im._getexif()  
  exif_data = {}
  for tag, value in exif_raw.items():
    decoded = ExifTags.TAGS.get(tag, tag)
    exif_data[decoded] = value
  if raw:
    return exif_data
  else:
    im_w = exif_data['ExifImageWidth']
    im_h = exif_data['ExifImageHeight']
    #date_time_digitized = exif_data['DateTimeDigitized']
    #date_time_original = exif_data['DateTimeOriginal']
    date_times = exif_data['DateTime'].split(' ')
    date_time = date_times[0].replace(':', '-') + ' ' + date_times[1]
    copyright = exif_data['Copyright']
    aperture_value = exif_data['ApertureValue'][0] / exif_data['ApertureValue'][0]
    fnumber = exif_data['FNumber'][0] / exif_data['FNumber'][1]
    focal_length = int(exif_data['FocalLength'][0] / exif_data['FocalLength'][1])
    iso = int(exif_data['ISOSpeedRatings'])
    exposure_mode = exif_data['ExposureMode']
    exposure_program = exif_data['ExposureProgram']
    #focal_plane_x = exif_data['FocalPlaneXResolution'][0] / exif_data['FocalPlaneXResolution'][1]
    #focal_plane_y = exif_data['FocalPlaneYResolution'][0] / exif_data['FocalPlaneYResolution'][1]
    result = {
  #     'image_width': im_w,
  #     'image_height': im_h,
      'date_time': date_time,
      #'date_time_digitized': date_time_digitized,
      #'date_time_original': date_time_original,
      #'copyright': copyright,
      'aperture': aperture_value,
      'fnumber': fnumber,
      'focal_length': focal_length,
      'exposure_program': exposure_program,
      'exposure_mode': exposure_mode,
      'iso': iso
    }
    return result
    

In [7]:
exif_data = []
for fp_im in tqdm(fp_ims):
  im_exif = get_exif(fp_im)
  im_exif.update({'filename': Path(fp_im).name})
  exif_data.append(im_exif)

HBox(children=(IntProgress(value=0, max=10917), HTML(value='')))




In [10]:
for ex in exif_data:
  #ex['focal_length'] = int(ex['focal_length'])
  splits = ex['date_time'].split(' ')
  ex['date'] = splits[0]
  ex['time'] = splits[1]

In [11]:
df_exif = pd.DataFrame.from_dict(exif_data)
df_exif['date_time'] = pd.to_datetime(df_exif['date_time'])
#df_exif.to_csv(fp_out_exif, index=False)
df_exif.to_csv(fp_out_exif, index=False)

In [12]:
df_exif.head()

Unnamed: 0,aperture,date,date_time,exposure_mode,exposure_program,filename,fnumber,focal_length,iso,time
0,1.0,2013-01-29,2013-01-29 12:07:45,1,1,e87a43c32cc697d3e6b40be3e3594057.jpg,5.6,800,100,12:07:45
1,1.0,2012-04-03,2012-04-03 11:07:53,0,3,9d15290fdd811d5cbaeb44448a4b54d3.jpg,5.6,800,400,11:07:53
2,1.0,2013-01-29,2013-01-29 12:08:01,1,1,d9cad73c2f47022195169e07f21dc567.jpg,5.6,800,100,12:08:01
3,1.0,2013-02-20,2013-02-20 12:16:35,1,1,decf44da0b963a33c88362e613878820.jpg,5.6,800,160,12:16:35
4,1.0,2013-02-19,2013-02-19 16:30:51,1,1,4a59b6b9b50cf6fc87e45caa0fdb86df.jpg,5.6,800,400,16:30:51


In [13]:
exif_dates = df_exif.groupby('date')

In [36]:
dates = list(exif_dates.groups.keys())
print(len(dates))
pprint(dates)

18
['2012-02-23',
 '2012-03-06',
 '2012-03-08',
 '2012-03-13',
 '2012-03-20',
 '2012-03-22',
 '2012-04-03',
 '2012-04-12',
 '2012-04-17',
 '2012-04-24',
 '2012-04-25',
 '2012-04-26',
 '2013-01-28',
 '2013-01-29',
 '2013-02-13',
 '2013-02-19',
 '2013-02-20',
 '2013-02-26']


In [21]:
for exif_date in exif_dates:
  print(exif_date.index)
  idx = exif_date
  break

<built-in method index of tuple object at 0x7fc507b54b48>


In [31]:
type(idx.count)

builtin_function_or_method