# Duke MTMC Timestamps

- use pymediainfo to extract timestamps
- save data to CSV

In [62]:
%reload_ext autoreload
%autoreload 2

import os
from os.path import join
import math
import time
from glob import glob
import datetime

import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm_notebook as tqdm
from pymediainfo import MediaInfo

In [44]:
fp_dir_videos = '/data_store/datasets/people/duke_mtmc/dataset/videos/'
fp_times = '/data_store/datasets/people/duke_mtmc/processed/video_times.csv'

In [18]:
fps_video = glob(join(fp_dir_videos, '**/*.MTS'), recursive=True)

In [12]:
def mediainfo(fp_in, raw=False):
  """Get media info using pymediainfo"""
  
  media_info_raw = MediaInfo.parse(fp_in).to_data()
  media_info = {}

  if raw:
    for d in media_info_raw['tracks']:
      if d['track_type'] == 'Video':
        media_info['video'] = d
      elif d['track_type'] == 'Audio':
        media_info['audio'] = d
  else:
    for d in media_info_raw['tracks']:
      if d['track_type'] == 'Video':
        media_info['video'] = {
          'codec_cc': d.get('codec_cc', ''),
          'duration': int(d.get('duration','')),
          'display_aspect_ratio': float(d.get('display_aspect_ratio', '')),
          'width': int(d['width']),
          'height': int(d['height']),
          'frame_rate': float(d['frame_rate']),
          'frame_count': int(d['frame_count']),
          }
  
  return media_info

In [19]:
print(len(fps_video))

87


In [63]:
def modification_date(fp):
  t = os.path.getmtime(fp)
  return datetime.datetime.fromtimestamp(t)

In [89]:
# Get file timestamp
meta = []
for fp_video in tqdm(fps_video):
  time_start = modification_date(fp_video)
  camera = int(Path(fp_video).parent.name[-1])
  fn = Path(fp_video).name
  m = mediainfo(fp_video)
  m = m.get('video')
  duration = int(m.get('duration'))
  minutes = duration / 1000 / 60
  time_end = time_start + datetime.timedelta(0, duration//1000) # ms to s
  meta.append(
    {
      'fn': fn, 
      'camera': camera,
      'time_start': str(time_start),
      'time_end': str(time_end),
      'duration': duration, # ms
      'frame_count': m.get('frame_count'),
      'frame_rate': m.get('frame_rate'),
      'width': m.get('width'),
      'height': m.get('height'),
      'minutes': f'{minutes:.3f}',
    })

HBox(children=(IntProgress(value=0, max=87), HTML(value='')))

In [90]:
df_meta = pd.DataFrame.from_dict(meta)
df_meta.to_csv(fp_times, index=False)

In [94]:
print('Total minutes:', df_meta['duration'].sum()/1000/60)

Total minutes: 888.7956166666667
