summaryrefslogtreecommitdiff
path: root/megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb')
-rw-r--r--megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb213
1 files changed, 213 insertions, 0 deletions
diff --git a/megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb b/megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb
new file mode 100644
index 00000000..5179cc7c
--- /dev/null
+++ b/megapixels/notebooks/datasets/duke_mtmc/duke_mtmc_timestamps.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Duke MTMC Timestamps\n",
+ "\n",
+ "- use pymediainfo to extract timestamps\n",
+ "- save data to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%reload_ext autoreload\n",
+ "%autoreload 2\n",
+ "\n",
+ "import os\n",
+ "from os.path import join\n",
+ "import math\n",
+ "import time\n",
+ "from glob import glob\n",
+ "import datetime\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pathlib import Path\n",
+ "from tqdm import tqdm_notebook as tqdm\n",
+ "from pymediainfo import MediaInfo"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fp_dir_videos = '/data_store/datasets/people/duke_mtmc/dataset/videos/'\n",
+ "fp_times = '/data_store/datasets/people/duke_mtmc/processed/video_times.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fps_video = glob(join(fp_dir_videos, '**/*.MTS'), recursive=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def mediainfo(fp_in, raw=False):\n",
+ " \"\"\"Get media info using pymediainfo\"\"\"\n",
+ " \n",
+ " media_info_raw = MediaInfo.parse(fp_in).to_data()\n",
+ " media_info = {}\n",
+ "\n",
+ " if raw:\n",
+ " for d in media_info_raw['tracks']:\n",
+ " if d['track_type'] == 'Video':\n",
+ " media_info['video'] = d\n",
+ " elif d['track_type'] == 'Audio':\n",
+ " media_info['audio'] = d\n",
+ " else:\n",
+ " for d in media_info_raw['tracks']:\n",
+ " if d['track_type'] == 'Video':\n",
+ " media_info['video'] = {\n",
+ " 'codec_cc': d.get('codec_cc', ''),\n",
+ " 'duration': int(d.get('duration','')),\n",
+ " 'display_aspect_ratio': float(d.get('display_aspect_ratio', '')),\n",
+ " 'width': int(d['width']),\n",
+ " 'height': int(d['height']),\n",
+ " 'frame_rate': float(d['frame_rate']),\n",
+ " 'frame_count': int(d['frame_count']),\n",
+ " }\n",
+ " \n",
+ " return media_info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "87\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(len(fps_video))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def modification_date(fp):\n",
+ " t = os.path.getmtime(fp)\n",
+ " return datetime.datetime.fromtimestamp(t)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9cb4c25594cc44d9995da82392acca0a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "HBox(children=(IntProgress(value=0, max=87), HTML(value='')))"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Get file timestamp\n",
+ "meta = []\n",
+ "for fp_video in tqdm(fps_video):\n",
+ " time_start = modification_date(fp_video)\n",
+ " camera = int(Path(fp_video).parent.name[-1])\n",
+ " fn = Path(fp_video).name\n",
+ " m = mediainfo(fp_video)\n",
+ " m = m.get('video')\n",
+ " duration = int(m.get('duration'))\n",
+ " minutes = duration / 1000 / 60\n",
+ " time_end = time_start + datetime.timedelta(0, duration//1000) # ms to s\n",
+ " meta.append(\n",
+ " {\n",
+ " 'fn': fn, \n",
+ " 'camera': camera,\n",
+ " 'time_start': str(time_start),\n",
+ " 'time_end': str(time_end),\n",
+ " 'duration': duration, # ms\n",
+ " 'frame_count': m.get('frame_count'),\n",
+ " 'frame_rate': m.get('frame_rate'),\n",
+ " 'width': m.get('width'),\n",
+ " 'height': m.get('height'),\n",
+ " 'minutes': f'{minutes:.3f}',\n",
+ " })"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_meta = pd.DataFrame.from_dict(meta)\n",
+ "df_meta.to_csv(fp_times, index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total minutes: 888.7956166666667\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Total minutes:', df_meta['duration'].sum()/1000/60)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "megapixels",
+ "language": "python",
+ "name": "megapixels"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}